datafusion_optimizer/simplify_expressions/
expr_simplifier.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Expression simplification API
19
20use std::collections::HashSet;
21use std::ops::Not;
22use std::{borrow::Cow, sync::Arc};
23
24use arrow::{
25    array::{new_null_array, AsArray},
26    datatypes::{DataType, Field, Schema},
27    record_batch::RecordBatch,
28};
29
30use datafusion_common::tree_node::TreeNodeContainer;
31use datafusion_common::{
32    cast::{as_large_list_array, as_list_array},
33    metadata::FieldMetadata,
34    tree_node::{
35        Transformed, TransformedResult, TreeNode, TreeNodeRecursion, TreeNodeRewriter,
36    },
37};
38use datafusion_common::{
39    exec_datafusion_err, internal_err, DFSchema, DataFusionError, Result, ScalarValue,
40};
41use datafusion_expr::{
42    and, binary::BinaryTypeCoercer, lit, or, simplify::SimplifyContext, BinaryExpr, Case,
43    ColumnarValue, Expr, Like, Operator, Volatility,
44};
45use datafusion_expr::{expr::ScalarFunction, interval_arithmetic::NullableInterval};
46use datafusion_expr::{
47    expr::{InList, InSubquery},
48    utils::{iter_conjunction, iter_conjunction_owned},
49};
50use datafusion_expr::{simplify::ExprSimplifyResult, Cast, TryCast};
51use datafusion_physical_expr::{create_physical_expr, execution_props::ExecutionProps};
52
53use super::inlist_simplifier::ShortenInListSimplifier;
54use super::utils::*;
55use crate::analyzer::type_coercion::TypeCoercionRewriter;
56use crate::simplify_expressions::guarantees::GuaranteeRewriter;
57use crate::simplify_expressions::regex::simplify_regex_expr;
58use crate::simplify_expressions::unwrap_cast::{
59    is_cast_expr_and_support_unwrap_cast_in_comparison_for_binary,
60    is_cast_expr_and_support_unwrap_cast_in_comparison_for_inlist,
61    unwrap_cast_in_comparison_for_binary,
62};
63use crate::simplify_expressions::SimplifyInfo;
64use datafusion_expr_common::casts::try_cast_literal_to_type;
65use indexmap::IndexSet;
66use regex::Regex;
67
68/// This structure handles API for expression simplification
69///
70/// Provides simplification information based on DFSchema and
71/// [`ExecutionProps`]. This is the default implementation used by DataFusion
72///
73/// For example:
74/// ```
75/// use arrow::datatypes::{DataType, Field, Schema};
76/// use datafusion_common::{DataFusionError, ToDFSchema};
77/// use datafusion_expr::execution_props::ExecutionProps;
78/// use datafusion_expr::simplify::SimplifyContext;
79/// use datafusion_expr::{col, lit};
80/// use datafusion_optimizer::simplify_expressions::ExprSimplifier;
81///
82/// // Create the schema
83/// let schema = Schema::new(vec![Field::new("i", DataType::Int64, false)])
84///     .to_dfschema_ref()
85///     .unwrap();
86///
87/// // Create the simplifier
88/// let props = ExecutionProps::new();
89/// let context = SimplifyContext::new(&props).with_schema(schema);
90/// let simplifier = ExprSimplifier::new(context);
91///
92/// // Use the simplifier
93///
94/// // b < 2 or (1 > 3)
95/// let expr = col("b").lt(lit(2)).or(lit(1).gt(lit(3)));
96///
97/// // b < 2
98/// let simplified = simplifier.simplify(expr).unwrap();
99/// assert_eq!(simplified, col("b").lt(lit(2)));
100/// ```
101pub struct ExprSimplifier<S> {
102    info: S,
103    /// Guarantees about the values of columns. This is provided by the user
104    /// in [ExprSimplifier::with_guarantees()].
105    guarantees: Vec<(Expr, NullableInterval)>,
106    /// Should expressions be canonicalized before simplification? Defaults to
107    /// true
108    canonicalize: bool,
109    /// Maximum number of simplifier cycles
110    max_simplifier_cycles: u32,
111}
112
113pub const THRESHOLD_INLINE_INLIST: usize = 3;
114pub const DEFAULT_MAX_SIMPLIFIER_CYCLES: u32 = 3;
115
116impl<S: SimplifyInfo> ExprSimplifier<S> {
117    /// Create a new `ExprSimplifier` with the given `info` such as an
118    /// instance of [`SimplifyContext`]. See
119    /// [`simplify`](Self::simplify) for an example.
120    ///
121    /// [`SimplifyContext`]: datafusion_expr::simplify::SimplifyContext
122    pub fn new(info: S) -> Self {
123        Self {
124            info,
125            guarantees: vec![],
126            canonicalize: true,
127            max_simplifier_cycles: DEFAULT_MAX_SIMPLIFIER_CYCLES,
128        }
129    }
130
131    /// Simplifies this [`Expr`] as much as possible, evaluating
132    /// constants and applying algebraic simplifications.
133    ///
134    /// The types of the expression must match what operators expect,
135    /// or else an error may occur trying to evaluate. See
136    /// [`coerce`](Self::coerce) for a function to help.
137    ///
138    /// # Example:
139    ///
140    /// `b > 2 AND b > 2`
141    ///
142    /// can be written to
143    ///
144    /// `b > 2`
145    ///
146    /// ```
147    /// use arrow::datatypes::DataType;
148    /// use datafusion_common::DFSchema;
149    /// use datafusion_common::Result;
150    /// use datafusion_expr::execution_props::ExecutionProps;
151    /// use datafusion_expr::simplify::SimplifyContext;
152    /// use datafusion_expr::simplify::SimplifyInfo;
153    /// use datafusion_expr::{col, lit, Expr};
154    /// use datafusion_optimizer::simplify_expressions::ExprSimplifier;
155    /// use std::sync::Arc;
156    ///
157    /// /// Simple implementation that provides `Simplifier` the information it needs
158    /// /// See SimplifyContext for a structure that does this.
159    /// #[derive(Default)]
160    /// struct Info {
161    ///     execution_props: ExecutionProps,
162    /// };
163    ///
164    /// impl SimplifyInfo for Info {
165    ///     fn is_boolean_type(&self, expr: &Expr) -> Result<bool> {
166    ///         Ok(false)
167    ///     }
168    ///     fn nullable(&self, expr: &Expr) -> Result<bool> {
169    ///         Ok(true)
170    ///     }
171    ///     fn execution_props(&self) -> &ExecutionProps {
172    ///         &self.execution_props
173    ///     }
174    ///     fn get_data_type(&self, expr: &Expr) -> Result<DataType> {
175    ///         Ok(DataType::Int32)
176    ///     }
177    /// }
178    ///
179    /// // Create the simplifier
180    /// let simplifier = ExprSimplifier::new(Info::default());
181    ///
182    /// // b < 2
183    /// let b_lt_2 = col("b").gt(lit(2));
184    ///
185    /// // (b < 2) OR (b < 2)
186    /// let expr = b_lt_2.clone().or(b_lt_2.clone());
187    ///
188    /// // (b < 2) OR (b < 2) --> (b < 2)
189    /// let expr = simplifier.simplify(expr).unwrap();
190    /// assert_eq!(expr, b_lt_2);
191    /// ```
192    pub fn simplify(&self, expr: Expr) -> Result<Expr> {
193        Ok(self.simplify_with_cycle_count_transformed(expr)?.0.data)
194    }
195
196    /// Like [Self::simplify], simplifies this [`Expr`] as much as possible, evaluating
197    /// constants and applying algebraic simplifications. Additionally returns a `u32`
198    /// representing the number of simplification cycles performed, which can be useful for testing
199    /// optimizations.
200    ///
201    /// See [Self::simplify] for details and usage examples.
202    #[deprecated(
203        since = "48.0.0",
204        note = "Use `simplify_with_cycle_count_transformed` instead"
205    )]
206    #[allow(unused_mut)]
207    pub fn simplify_with_cycle_count(&self, mut expr: Expr) -> Result<(Expr, u32)> {
208        let (transformed, cycle_count) =
209            self.simplify_with_cycle_count_transformed(expr)?;
210        Ok((transformed.data, cycle_count))
211    }
212
213    /// Like [Self::simplify], simplifies this [`Expr`] as much as possible, evaluating
214    /// constants and applying algebraic simplifications. Additionally returns a `u32`
215    /// representing the number of simplification cycles performed, which can be useful for testing
216    /// optimizations.
217    ///
218    /// # Returns
219    ///
220    /// A tuple containing:
221    /// - The simplified expression wrapped in a `Transformed<Expr>` indicating if changes were made
222    /// - The number of simplification cycles that were performed
223    ///
224    /// See [Self::simplify] for details and usage examples.
225    pub fn simplify_with_cycle_count_transformed(
226        &self,
227        mut expr: Expr,
228    ) -> Result<(Transformed<Expr>, u32)> {
229        let mut simplifier = Simplifier::new(&self.info);
230        let mut const_evaluator = ConstEvaluator::try_new(self.info.execution_props())?;
231        let mut shorten_in_list_simplifier = ShortenInListSimplifier::new();
232        let mut guarantee_rewriter = GuaranteeRewriter::new(&self.guarantees);
233
234        if self.canonicalize {
235            expr = expr.rewrite(&mut Canonicalizer::new()).data()?
236        }
237
238        // Evaluating constants can enable new simplifications and
239        // simplifications can enable new constant evaluation
240        // see `Self::with_max_cycles`
241        let mut num_cycles = 0;
242        let mut has_transformed = false;
243        loop {
244            let Transformed {
245                data, transformed, ..
246            } = expr
247                .rewrite(&mut const_evaluator)?
248                .transform_data(|expr| expr.rewrite(&mut simplifier))?
249                .transform_data(|expr| expr.rewrite(&mut guarantee_rewriter))?;
250            expr = data;
251            num_cycles += 1;
252            // Track if any transformation occurred
253            has_transformed = has_transformed || transformed;
254            if !transformed || num_cycles >= self.max_simplifier_cycles {
255                break;
256            }
257        }
258        // shorten inlist should be started after other inlist rules are applied
259        expr = expr.rewrite(&mut shorten_in_list_simplifier).data()?;
260        Ok((
261            Transformed::new_transformed(expr, has_transformed),
262            num_cycles,
263        ))
264    }
265
266    /// Apply type coercion to an [`Expr`] so that it can be
267    /// evaluated as a [`PhysicalExpr`](datafusion_physical_expr::PhysicalExpr).
268    ///
269    /// See the [type coercion module](datafusion_expr::type_coercion)
270    /// documentation for more details on type coercion
271    pub fn coerce(&self, expr: Expr, schema: &DFSchema) -> Result<Expr> {
272        let mut expr_rewrite = TypeCoercionRewriter { schema };
273        expr.rewrite_with_schema(schema, &mut expr_rewrite).data()
274    }
275
276    /// Input guarantees about the values of columns.
277    ///
278    /// The guarantees can simplify expressions. For example, if a column `x` is
279    /// guaranteed to be `3`, then the expression `x > 1` can be replaced by the
280    /// literal `true`.
281    ///
282    /// The guarantees are provided as a `Vec<(Expr, NullableInterval)>`,
283    /// where the [Expr] is a column reference and the [NullableInterval]
284    /// is an interval representing the known possible values of that column.
285    ///
286    /// ```rust
287    /// use arrow::datatypes::{DataType, Field, Schema};
288    /// use datafusion_common::{Result, ScalarValue, ToDFSchema};
289    /// use datafusion_expr::execution_props::ExecutionProps;
290    /// use datafusion_expr::interval_arithmetic::{Interval, NullableInterval};
291    /// use datafusion_expr::simplify::SimplifyContext;
292    /// use datafusion_expr::{col, lit, Expr};
293    /// use datafusion_optimizer::simplify_expressions::ExprSimplifier;
294    ///
295    /// let schema = Schema::new(vec![
296    ///     Field::new("x", DataType::Int64, false),
297    ///     Field::new("y", DataType::UInt32, false),
298    ///     Field::new("z", DataType::Int64, false),
299    /// ])
300    /// .to_dfschema_ref()
301    /// .unwrap();
302    ///
303    /// // Create the simplifier
304    /// let props = ExecutionProps::new();
305    /// let context = SimplifyContext::new(&props).with_schema(schema);
306    ///
307    /// // Expression: (x >= 3) AND (y + 2 < 10) AND (z > 5)
308    /// let expr_x = col("x").gt_eq(lit(3_i64));
309    /// let expr_y = (col("y") + lit(2_u32)).lt(lit(10_u32));
310    /// let expr_z = col("z").gt(lit(5_i64));
311    /// let expr = expr_x.and(expr_y).and(expr_z.clone());
312    ///
313    /// let guarantees = vec![
314    ///     // x ∈ [3, 5]
315    ///     (
316    ///         col("x"),
317    ///         NullableInterval::NotNull {
318    ///             values: Interval::make(Some(3_i64), Some(5_i64)).unwrap(),
319    ///         },
320    ///     ),
321    ///     // y = 3
322    ///     (
323    ///         col("y"),
324    ///         NullableInterval::from(ScalarValue::UInt32(Some(3))),
325    ///     ),
326    /// ];
327    /// let simplifier = ExprSimplifier::new(context).with_guarantees(guarantees);
328    /// let output = simplifier.simplify(expr).unwrap();
329    /// // Expression becomes: true AND true AND (z > 5), which simplifies to
330    /// // z > 5.
331    /// assert_eq!(output, expr_z);
332    /// ```
333    pub fn with_guarantees(mut self, guarantees: Vec<(Expr, NullableInterval)>) -> Self {
334        self.guarantees = guarantees;
335        self
336    }
337
338    /// Should `Canonicalizer` be applied before simplification?
339    ///
340    /// If true (the default), the expression will be rewritten to canonical
341    /// form before simplification. This is useful to ensure that the simplifier
342    /// can apply all possible simplifications.
343    ///
344    /// Some expressions, such as those in some Joins, can not be canonicalized
345    /// without changing their meaning. In these cases, canonicalization should
346    /// be disabled.
347    ///
348    /// ```rust
349    /// use arrow::datatypes::{DataType, Field, Schema};
350    /// use datafusion_common::{Result, ScalarValue, ToDFSchema};
351    /// use datafusion_expr::execution_props::ExecutionProps;
352    /// use datafusion_expr::interval_arithmetic::{Interval, NullableInterval};
353    /// use datafusion_expr::simplify::SimplifyContext;
354    /// use datafusion_expr::{col, lit, Expr};
355    /// use datafusion_optimizer::simplify_expressions::ExprSimplifier;
356    ///
357    /// let schema = Schema::new(vec![
358    ///     Field::new("a", DataType::Int64, false),
359    ///     Field::new("b", DataType::Int64, false),
360    ///     Field::new("c", DataType::Int64, false),
361    /// ])
362    /// .to_dfschema_ref()
363    /// .unwrap();
364    ///
365    /// // Create the simplifier
366    /// let props = ExecutionProps::new();
367    /// let context = SimplifyContext::new(&props).with_schema(schema);
368    /// let simplifier = ExprSimplifier::new(context);
369    ///
370    /// // Expression: a = c AND 1 = b
371    /// let expr = col("a").eq(col("c")).and(lit(1).eq(col("b")));
372    ///
373    /// // With canonicalization, the expression is rewritten to canonical form
374    /// // (though it is no simpler in this case):
375    /// let canonical = simplifier.simplify(expr.clone()).unwrap();
376    /// // Expression has been rewritten to: (c = a AND b = 1)
377    /// assert_eq!(canonical, col("c").eq(col("a")).and(col("b").eq(lit(1))));
378    ///
379    /// // If canonicalization is disabled, the expression is not changed
380    /// let non_canonicalized = simplifier
381    ///     .with_canonicalize(false)
382    ///     .simplify(expr.clone())
383    ///     .unwrap();
384    ///
385    /// assert_eq!(non_canonicalized, expr);
386    /// ```
387    pub fn with_canonicalize(mut self, canonicalize: bool) -> Self {
388        self.canonicalize = canonicalize;
389        self
390    }
391
392    /// Specifies the maximum number of simplification cycles to run.
393    ///
394    /// The simplifier can perform multiple passes of simplification. This is
395    /// because the output of one simplification step can allow more optimizations
396    /// in another simplification step. For example, constant evaluation can allow more
397    /// expression simplifications, and expression simplifications can allow more constant
398    /// evaluations.
399    ///
400    /// This method specifies the maximum number of allowed iteration cycles before the simplifier
401    /// returns an [Expr] output. However, it does not always perform the maximum number of cycles.
402    /// The simplifier will attempt to detect when an [Expr] is unchanged by all the simplification
403    /// passes, and return early. This avoids wasting time on unnecessary [Expr] tree traversals.
404    ///
405    /// If no maximum is specified, the value of [DEFAULT_MAX_SIMPLIFIER_CYCLES] is used
406    /// instead.
407    ///
408    /// ```rust
409    /// use arrow::datatypes::{DataType, Field, Schema};
410    /// use datafusion_expr::{col, lit, Expr};
411    /// use datafusion_common::{Result, ScalarValue, ToDFSchema};
412    /// use datafusion_expr::execution_props::ExecutionProps;
413    /// use datafusion_expr::simplify::SimplifyContext;
414    /// use datafusion_optimizer::simplify_expressions::ExprSimplifier;
415    ///
416    /// let schema = Schema::new(vec![
417    ///   Field::new("a", DataType::Int64, false),
418    ///   ])
419    ///   .to_dfschema_ref().unwrap();
420    ///
421    /// // Create the simplifier
422    /// let props = ExecutionProps::new();
423    /// let context = SimplifyContext::new(&props)
424    ///    .with_schema(schema);
425    /// let simplifier = ExprSimplifier::new(context);
426    ///
427    /// // Expression: a IS NOT NULL
428    /// let expr = col("a").is_not_null();
429    ///
430    /// // When using default maximum cycles, 2 cycles will be performed.
431    /// let (simplified_expr, count) = simplifier.simplify_with_cycle_count_transformed(expr.clone()).unwrap();
432    /// assert_eq!(simplified_expr.data, lit(true));
433    /// // 2 cycles were executed, but only 1 was needed
434    /// assert_eq!(count, 2);
435    ///
436    /// // Only 1 simplification pass is necessary here, so we can set the maximum cycles to 1.
437    /// let (simplified_expr, count) = simplifier.with_max_cycles(1).simplify_with_cycle_count_transformed(expr.clone()).unwrap();
438    /// // Expression has been rewritten to: (c = a AND b = 1)
439    /// assert_eq!(simplified_expr.data, lit(true));
440    /// // Only 1 cycle was executed
441    /// assert_eq!(count, 1);
442    /// ```
443    pub fn with_max_cycles(mut self, max_simplifier_cycles: u32) -> Self {
444        self.max_simplifier_cycles = max_simplifier_cycles;
445        self
446    }
447}
448
449/// Canonicalize any BinaryExprs that are not in canonical form
450///
451/// `<literal> <op> <col>` is rewritten to `<col> <op> <literal>`
452///
453/// `<col1> <op> <col2>` is rewritten so that the name of `col1` sorts higher
454/// than `col2` (`a > b` would be canonicalized to `b < a`)
455struct Canonicalizer {}
456
457impl Canonicalizer {
458    fn new() -> Self {
459        Self {}
460    }
461}
462
463impl TreeNodeRewriter for Canonicalizer {
464    type Node = Expr;
465
466    fn f_up(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
467        let Expr::BinaryExpr(BinaryExpr { left, op, right }) = expr else {
468            return Ok(Transformed::no(expr));
469        };
470        match (left.as_ref(), right.as_ref(), op.swap()) {
471            // <col1> <op> <col2>
472            (Expr::Column(left_col), Expr::Column(right_col), Some(swapped_op))
473                if right_col > left_col =>
474            {
475                Ok(Transformed::yes(Expr::BinaryExpr(BinaryExpr {
476                    left: right,
477                    op: swapped_op,
478                    right: left,
479                })))
480            }
481            // <literal> <op> <col>
482            (Expr::Literal(_a, _), Expr::Column(_b), Some(swapped_op)) => {
483                Ok(Transformed::yes(Expr::BinaryExpr(BinaryExpr {
484                    left: right,
485                    op: swapped_op,
486                    right: left,
487                })))
488            }
489            _ => Ok(Transformed::no(Expr::BinaryExpr(BinaryExpr {
490                left,
491                op,
492                right,
493            }))),
494        }
495    }
496}
497
498#[allow(rustdoc::private_intra_doc_links)]
499/// Partially evaluate `Expr`s so constant subtrees are evaluated at plan time.
500///
501/// Note it does not handle algebraic rewrites such as `(a or false)`
502/// --> `a`, which is handled by [`Simplifier`]
503struct ConstEvaluator<'a> {
504    /// `can_evaluate` is used during the depth-first-search of the
505    /// `Expr` tree to track if any siblings (or their descendants) were
506    /// non evaluatable (e.g. had a column reference or volatile
507    /// function)
508    ///
509    /// Specifically, `can_evaluate[N]` represents the state of
510    /// traversal when we are N levels deep in the tree, one entry for
511    /// this Expr and each of its parents.
512    ///
513    /// After visiting all siblings if `can_evaluate.top()` is true, that
514    /// means there were no non evaluatable siblings (or their
515    /// descendants) so this `Expr` can be evaluated
516    can_evaluate: Vec<bool>,
517
518    execution_props: &'a ExecutionProps,
519    input_schema: DFSchema,
520    input_batch: RecordBatch,
521}
522
523#[allow(dead_code)]
524/// The simplify result of ConstEvaluator
525enum ConstSimplifyResult {
526    // Expr was simplified and contains the new expression
527    Simplified(ScalarValue, Option<FieldMetadata>),
528    // Expr was not simplified and original value is returned
529    NotSimplified(ScalarValue, Option<FieldMetadata>),
530    // Evaluation encountered an error, contains the original expression
531    SimplifyRuntimeError(DataFusionError, Expr),
532}
533
534impl TreeNodeRewriter for ConstEvaluator<'_> {
535    type Node = Expr;
536
537    fn f_down(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
538        // Default to being able to evaluate this node
539        self.can_evaluate.push(true);
540
541        // if this expr is not ok to evaluate, mark entire parent
542        // stack as not ok (as all parents have at least one child or
543        // descendant that can not be evaluated
544
545        if !Self::can_evaluate(&expr) {
546            // walk back up stack, marking first parent that is not mutable
547            let parent_iter = self.can_evaluate.iter_mut().rev();
548            for p in parent_iter {
549                if !*p {
550                    // optimization: if we find an element on the
551                    // stack already marked, know all elements above are also marked
552                    break;
553                }
554                *p = false;
555            }
556        }
557
558        // NB: do not short circuit recursion even if we find a non
559        // evaluatable node (so we can fold other children, args to
560        // functions, etc.)
561        Ok(Transformed::no(expr))
562    }
563
564    fn f_up(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
565        match self.can_evaluate.pop() {
566            // Certain expressions such as `CASE` and `COALESCE` are short-circuiting
567            // and may not evaluate all their sub expressions. Thus, if
568            // any error is countered during simplification, return the original
569            // so that normal evaluation can occur
570            Some(true) => match self.evaluate_to_scalar(expr) {
571                ConstSimplifyResult::Simplified(s, m) => {
572                    Ok(Transformed::yes(Expr::Literal(s, m)))
573                }
574                ConstSimplifyResult::NotSimplified(s, m) => {
575                    Ok(Transformed::no(Expr::Literal(s, m)))
576                }
577                ConstSimplifyResult::SimplifyRuntimeError(err, expr) => {
578                    // For CAST expressions with literal inputs, propagate the error at plan time rather than deferring to execution time.
579                    // This provides clearer error messages and fails fast.
580                    if let Expr::Cast(Cast { ref expr, .. })
581                    | Expr::TryCast(TryCast { ref expr, .. }) = expr
582                    {
583                        if matches!(expr.as_ref(), Expr::Literal(_, _)) {
584                            return Err(err);
585                        }
586                    }
587                    // For other expressions (like CASE, COALESCE), preserve the original
588                    // to allow short-circuit evaluation at execution time
589                    Ok(Transformed::yes(expr))
590                }
591            },
592            Some(false) => Ok(Transformed::no(expr)),
593            _ => internal_err!("Failed to pop can_evaluate"),
594        }
595    }
596}
597
598impl<'a> ConstEvaluator<'a> {
599    /// Create a new `ConstantEvaluator`. Session constants (such as
600    /// the time for `now()` are taken from the passed
601    /// `execution_props`.
602    pub fn try_new(execution_props: &'a ExecutionProps) -> Result<Self> {
603        // The dummy column name is unused and doesn't matter as only
604        // expressions without column references can be evaluated
605        static DUMMY_COL_NAME: &str = ".";
606        let schema = Arc::new(Schema::new(vec![Field::new(
607            DUMMY_COL_NAME,
608            DataType::Null,
609            true,
610        )]));
611        let input_schema = DFSchema::try_from(Arc::clone(&schema))?;
612        // Need a single "input" row to produce a single output row
613        let col = new_null_array(&DataType::Null, 1);
614        let input_batch = RecordBatch::try_new(schema, vec![col])?;
615
616        Ok(Self {
617            can_evaluate: vec![],
618            execution_props,
619            input_schema,
620            input_batch,
621        })
622    }
623
624    /// Can a function of the specified volatility be evaluated?
625    fn volatility_ok(volatility: Volatility) -> bool {
626        match volatility {
627            Volatility::Immutable => true,
628            // Values for functions such as now() are taken from ExecutionProps
629            Volatility::Stable => true,
630            Volatility::Volatile => false,
631        }
632    }
633
634    /// Can the expression be evaluated at plan time, (assuming all of
635    /// its children can also be evaluated)?
636    fn can_evaluate(expr: &Expr) -> bool {
637        // check for reasons we can't evaluate this node
638        //
639        // NOTE all expr types are listed here so when new ones are
640        // added they can be checked for their ability to be evaluated
641        // at plan time
642        match expr {
643            // TODO: remove the next line after `Expr::Wildcard` is removed
644            #[expect(deprecated)]
645            Expr::AggregateFunction { .. }
646            | Expr::ScalarVariable(_, _)
647            | Expr::Column(_)
648            | Expr::OuterReferenceColumn(_, _)
649            | Expr::Exists { .. }
650            | Expr::InSubquery(_)
651            | Expr::ScalarSubquery(_)
652            | Expr::WindowFunction { .. }
653            | Expr::GroupingSet(_)
654            | Expr::Wildcard { .. }
655            | Expr::Placeholder(_)
656            | Expr::Lambda { .. } => false,
657            Expr::ScalarFunction(ScalarFunction { func, .. }) => {
658                Self::volatility_ok(func.signature().volatility)
659            }
660            Expr::Literal(_, _)
661            | Expr::Alias(..)
662            | Expr::Unnest(_)
663            | Expr::BinaryExpr { .. }
664            | Expr::Not(_)
665            | Expr::IsNotNull(_)
666            | Expr::IsNull(_)
667            | Expr::IsTrue(_)
668            | Expr::IsFalse(_)
669            | Expr::IsUnknown(_)
670            | Expr::IsNotTrue(_)
671            | Expr::IsNotFalse(_)
672            | Expr::IsNotUnknown(_)
673            | Expr::Negative(_)
674            | Expr::Between { .. }
675            | Expr::Like { .. }
676            | Expr::SimilarTo { .. }
677            | Expr::Case(_)
678            | Expr::Cast { .. }
679            | Expr::TryCast { .. }
680            | Expr::InList { .. } => true,
681        }
682    }
683
684    /// Internal helper to evaluates an Expr
685    pub(crate) fn evaluate_to_scalar(&mut self, expr: Expr) -> ConstSimplifyResult {
686        if let Expr::Literal(s, m) = expr {
687            return ConstSimplifyResult::NotSimplified(s, m);
688        }
689
690        let phys_expr =
691            match create_physical_expr(&expr, &self.input_schema, self.execution_props) {
692                Ok(e) => e,
693                Err(err) => return ConstSimplifyResult::SimplifyRuntimeError(err, expr),
694            };
695        let metadata = phys_expr
696            .return_field(self.input_batch.schema_ref())
697            .ok()
698            .and_then(|f| {
699                let m = f.metadata();
700                match m.is_empty() {
701                    true => None,
702                    false => Some(FieldMetadata::from(m)),
703                }
704            });
705        let col_val = match phys_expr.evaluate(&self.input_batch) {
706            Ok(v) => v,
707            Err(err) => return ConstSimplifyResult::SimplifyRuntimeError(err, expr),
708        };
709        match col_val {
710            ColumnarValue::Array(a) => {
711                if a.len() != 1 {
712                    ConstSimplifyResult::SimplifyRuntimeError(
713                        exec_datafusion_err!("Could not evaluate the expression, found a result of length {}", a.len()),
714                        expr,
715                    )
716                } else if as_list_array(&a).is_ok() {
717                    ConstSimplifyResult::Simplified(
718                        ScalarValue::List(a.as_list::<i32>().to_owned().into()),
719                        metadata,
720                    )
721                } else if as_large_list_array(&a).is_ok() {
722                    ConstSimplifyResult::Simplified(
723                        ScalarValue::LargeList(a.as_list::<i64>().to_owned().into()),
724                        metadata,
725                    )
726                } else {
727                    // Non-ListArray
728                    match ScalarValue::try_from_array(&a, 0) {
729                        Ok(s) => ConstSimplifyResult::Simplified(s, metadata),
730                        Err(err) => ConstSimplifyResult::SimplifyRuntimeError(err, expr),
731                    }
732                }
733            }
734            ColumnarValue::Scalar(s) => ConstSimplifyResult::Simplified(s, metadata),
735        }
736    }
737}
738
739/// Simplifies [`Expr`]s by applying algebraic transformation rules
740///
741/// Example transformations that are applied:
742/// * `expr = true` and `expr != false` to `expr` when `expr` is of boolean type
743/// * `expr = false` and `expr != true` to `!expr` when `expr` is of boolean type
744/// * `true = true` and `false = false` to `true`
745/// * `false = true` and `true = false` to `false`
746/// * `!!expr` to `expr`
747/// * `expr = null` and `expr != null` to `null`
748struct Simplifier<'a, S> {
749    info: &'a S,
750}
751
752impl<'a, S> Simplifier<'a, S> {
753    pub fn new(info: &'a S) -> Self {
754        Self { info }
755    }
756}
757
758impl<S: SimplifyInfo> TreeNodeRewriter for Simplifier<'_, S> {
759    type Node = Expr;
760
761    fn f_down(&mut self, expr: Self::Node) -> Result<Transformed<Expr>> {
762        match expr {
763            Expr::ScalarFunction(ScalarFunction { func, args })
764                if args.iter().any(|arg| matches!(arg, Expr::Lambda(_))) =>
765            {
766                // there's currently no way to adapt a generic SimplifyInfo with lambda parameters,
767                // so, if the scalar function has any lambda, we materialize a DFSchema using all the
768                // columns references in every arguments. Than we can call lambdas_schemas_from_args,
769                // and for each argument, we create a new SimplifyContext with the scoped schema, and
770                // simplify the argument using this 'sub-context'. Finally, we set Transformed.tnr to
771                // Jump so the parent context doesn't try to simplify the argument again, without the
772                // parameters info
773
774                // get all columns references
775                let mut columns_refs = HashSet::new();
776
777                for arg in &args {
778                    arg.add_column_refs(&mut columns_refs);
779                }
780
781                // materialize columns references into qualified fields
782                let qualified_fields = columns_refs
783                    .into_iter()
784                    .map(|captured_column| {
785                        let expr = Expr::Column(captured_column.clone());
786
787                        Ok((
788                            captured_column.relation.clone(),
789                            Arc::new(Field::new(
790                                captured_column.name(),
791                                self.info.get_data_type(&expr)?,
792                                self.info.nullable(&expr)?,
793                            )),
794                        ))
795                    })
796                    .collect::<Result<_>>()?;
797
798                // create a schema using the materialized fields
799                let dfschema =
800                    DFSchema::new_with_metadata(qualified_fields, Default::default())?;
801
802                let mut scoped_schemas = func
803                    .arguments_schema_from_logical_args(&args, &dfschema)?
804                    .into_iter();
805
806                let transformed_args = args
807                    .map_elements(|arg| {
808                        let scoped_schema = scoped_schemas.next().unwrap();
809
810                        // create a sub-context, using the scoped schema, that includes information about the lambda parameters
811                        let simplify_context =
812                            SimplifyContext::new(self.info.execution_props())
813                                .with_schema(Arc::new(scoped_schema.into_owned()));
814
815                        let mut simplifier = Simplifier::new(&simplify_context);
816
817                        // simplify the argument using it's context
818                        arg.rewrite(&mut simplifier)
819                    })?
820                    .update_data(|args| {
821                        Expr::ScalarFunction(ScalarFunction { func, args })
822                    });
823
824                Ok(Transformed::new(
825                    transformed_args.data,
826                    transformed_args.transformed,
827                    // return at least Jump so the parent contex doesn't try again to simplify the arguments
828                    // (and fail because it doesn't contain info about lambdas paramters)
829                    match transformed_args.tnr {
830                        TreeNodeRecursion::Continue | TreeNodeRecursion::Jump => {
831                            TreeNodeRecursion::Jump
832                        }
833                        TreeNodeRecursion::Stop => TreeNodeRecursion::Stop,
834                    },
835                ))
836
837                // Ok(transformed_args.update_data(|args| Expr::ScalarFunction(ScalarFunction { func, args})))
838            }
839            // Expr::Lambda(_) => Ok(Transformed::new(expr, false, TreeNodeRecursion::Jump)),
840            _ => Ok(Transformed::no(expr)),
841        }
842    }
843
844    /// rewrite the expression simplifying any constant expressions
845    fn f_up(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
846        use datafusion_expr::Operator::{
847            And, BitwiseAnd, BitwiseOr, BitwiseShiftLeft, BitwiseShiftRight, BitwiseXor,
848            Divide, Eq, Modulo, Multiply, NotEq, Or, RegexIMatch, RegexMatch,
849            RegexNotIMatch, RegexNotMatch,
850        };
851
852        let info = self.info;
853        Ok(match expr {
854            // `value op NULL` -> `NULL`
855            // `NULL op value` -> `NULL`
856            // except for few operators that can return non-null value even when one of the operands is NULL
857            ref expr @ Expr::BinaryExpr(BinaryExpr {
858                ref left,
859                ref op,
860                ref right,
861            }) if op.returns_null_on_null()
862                && (is_null(left.as_ref()) || is_null(right.as_ref())) =>
863            {
864                Transformed::yes(Expr::Literal(
865                    ScalarValue::try_new_null(&info.get_data_type(expr)?)?,
866                    None,
867                ))
868            }
869
870            // `NULL {AND, OR} NULL` -> `NULL`
871            Expr::BinaryExpr(BinaryExpr {
872                left,
873                op: And | Or,
874                right,
875            }) if is_null(&left) && is_null(&right) => Transformed::yes(lit_bool_null()),
876
877            //
878            // Rules for Eq
879            //
880
881            // true = A  --> A
882            // false = A --> !A
883            // null = A --> null
884            Expr::BinaryExpr(BinaryExpr {
885                left,
886                op: Eq,
887                right,
888            }) if is_bool_lit(&left) && info.is_boolean_type(&right)? => {
889                Transformed::yes(match as_bool_lit(&left)? {
890                    Some(true) => *right,
891                    Some(false) => Expr::Not(right),
892                    None => lit_bool_null(),
893                })
894            }
895            // A = true  --> A
896            // A = false --> !A
897            // A = null --> null
898            Expr::BinaryExpr(BinaryExpr {
899                left,
900                op: Eq,
901                right,
902            }) if is_bool_lit(&right) && info.is_boolean_type(&left)? => {
903                Transformed::yes(match as_bool_lit(&right)? {
904                    Some(true) => *left,
905                    Some(false) => Expr::Not(left),
906                    None => lit_bool_null(),
907                })
908            }
909            // According to SQL's null semantics, NULL = NULL evaluates to NULL
910            // Both sides are the same expression (A = A) and A is non-volatile expression
911            // A = A --> A IS NOT NULL OR NULL
912            // A = A --> true (if A not nullable)
913            Expr::BinaryExpr(BinaryExpr {
914                left,
915                op: Eq,
916                right,
917            }) if (left == right) & !left.is_volatile() => {
918                Transformed::yes(match !info.nullable(&left)? {
919                    true => lit(true),
920                    false => Expr::BinaryExpr(BinaryExpr {
921                        left: Box::new(Expr::IsNotNull(left)),
922                        op: Or,
923                        right: Box::new(lit_bool_null()),
924                    }),
925                })
926            }
927
928            // Rules for NotEq
929            //
930
931            // true != A  --> !A
932            // false != A --> A
933            // null != A --> null
934            Expr::BinaryExpr(BinaryExpr {
935                left,
936                op: NotEq,
937                right,
938            }) if is_bool_lit(&left) && info.is_boolean_type(&right)? => {
939                Transformed::yes(match as_bool_lit(&left)? {
940                    Some(true) => Expr::Not(right),
941                    Some(false) => *right,
942                    None => lit_bool_null(),
943                })
944            }
945            // A != true  --> !A
946            // A != false --> A
947            // A != null --> null,
948            Expr::BinaryExpr(BinaryExpr {
949                left,
950                op: NotEq,
951                right,
952            }) if is_bool_lit(&right) && info.is_boolean_type(&left)? => {
953                Transformed::yes(match as_bool_lit(&right)? {
954                    Some(true) => Expr::Not(left),
955                    Some(false) => *left,
956                    None => lit_bool_null(),
957                })
958            }
959
960            //
961            // Rules for OR
962            //
963
964            // true OR A --> true (even if A is null)
965            Expr::BinaryExpr(BinaryExpr {
966                left,
967                op: Or,
968                right: _,
969            }) if is_true(&left) => Transformed::yes(*left),
970            // false OR A --> A
971            Expr::BinaryExpr(BinaryExpr {
972                left,
973                op: Or,
974                right,
975            }) if is_false(&left) => Transformed::yes(*right),
976            // A OR true --> true (even if A is null)
977            Expr::BinaryExpr(BinaryExpr {
978                left: _,
979                op: Or,
980                right,
981            }) if is_true(&right) => Transformed::yes(*right),
982            // A OR false --> A
983            Expr::BinaryExpr(BinaryExpr {
984                left,
985                op: Or,
986                right,
987            }) if is_false(&right) => Transformed::yes(*left),
988            // A OR !A ---> true (if A not nullable)
989            Expr::BinaryExpr(BinaryExpr {
990                left,
991                op: Or,
992                right,
993            }) if is_not_of(&right, &left) && !info.nullable(&left)? => {
994                Transformed::yes(lit(true))
995            }
996            // !A OR A ---> true (if A not nullable)
997            Expr::BinaryExpr(BinaryExpr {
998                left,
999                op: Or,
1000                right,
1001            }) if is_not_of(&left, &right) && !info.nullable(&right)? => {
1002                Transformed::yes(lit(true))
1003            }
1004            // (..A..) OR A --> (..A..)
1005            Expr::BinaryExpr(BinaryExpr {
1006                left,
1007                op: Or,
1008                right,
1009            }) if expr_contains(&left, &right, Or) => Transformed::yes(*left),
1010            // A OR (..A..) --> (..A..)
1011            Expr::BinaryExpr(BinaryExpr {
1012                left,
1013                op: Or,
1014                right,
1015            }) if expr_contains(&right, &left, Or) => Transformed::yes(*right),
1016            // A OR (A AND B) --> A
1017            Expr::BinaryExpr(BinaryExpr {
1018                left,
1019                op: Or,
1020                right,
1021            }) if is_op_with(And, &right, &left) => Transformed::yes(*left),
1022            // (A AND B) OR A --> A
1023            Expr::BinaryExpr(BinaryExpr {
1024                left,
1025                op: Or,
1026                right,
1027            }) if is_op_with(And, &left, &right) => Transformed::yes(*right),
1028            // Eliminate common factors in conjunctions e.g
1029            // (A AND B) OR (A AND C) -> A AND (B OR C)
1030            Expr::BinaryExpr(BinaryExpr {
1031                left,
1032                op: Or,
1033                right,
1034            }) if has_common_conjunction(&left, &right) => {
1035                let lhs: IndexSet<Expr> = iter_conjunction_owned(*left).collect();
1036                let (common, rhs): (Vec<_>, Vec<_>) = iter_conjunction_owned(*right)
1037                    .partition(|e| lhs.contains(e) && !e.is_volatile());
1038
1039                let new_rhs = rhs.into_iter().reduce(and);
1040                let new_lhs = lhs.into_iter().filter(|e| !common.contains(e)).reduce(and);
1041                let common_conjunction = common.into_iter().reduce(and).unwrap();
1042
1043                let new_expr = match (new_lhs, new_rhs) {
1044                    (Some(lhs), Some(rhs)) => and(common_conjunction, or(lhs, rhs)),
1045                    (_, _) => common_conjunction,
1046                };
1047                Transformed::yes(new_expr)
1048            }
1049
1050            //
1051            // Rules for AND
1052            //
1053
1054            // true AND A --> A
1055            Expr::BinaryExpr(BinaryExpr {
1056                left,
1057                op: And,
1058                right,
1059            }) if is_true(&left) => Transformed::yes(*right),
1060            // false AND A --> false (even if A is null)
1061            Expr::BinaryExpr(BinaryExpr {
1062                left,
1063                op: And,
1064                right: _,
1065            }) if is_false(&left) => Transformed::yes(*left),
1066            // A AND true --> A
1067            Expr::BinaryExpr(BinaryExpr {
1068                left,
1069                op: And,
1070                right,
1071            }) if is_true(&right) => Transformed::yes(*left),
1072            // A AND false --> false (even if A is null)
1073            Expr::BinaryExpr(BinaryExpr {
1074                left: _,
1075                op: And,
1076                right,
1077            }) if is_false(&right) => Transformed::yes(*right),
1078            // A AND !A ---> false (if A not nullable)
1079            Expr::BinaryExpr(BinaryExpr {
1080                left,
1081                op: And,
1082                right,
1083            }) if is_not_of(&right, &left) && !info.nullable(&left)? => {
1084                Transformed::yes(lit(false))
1085            }
1086            // !A AND A ---> false (if A not nullable)
1087            Expr::BinaryExpr(BinaryExpr {
1088                left,
1089                op: And,
1090                right,
1091            }) if is_not_of(&left, &right) && !info.nullable(&right)? => {
1092                Transformed::yes(lit(false))
1093            }
1094            // (..A..) AND A --> (..A..)
1095            Expr::BinaryExpr(BinaryExpr {
1096                left,
1097                op: And,
1098                right,
1099            }) if expr_contains(&left, &right, And) => Transformed::yes(*left),
1100            // A AND (..A..) --> (..A..)
1101            Expr::BinaryExpr(BinaryExpr {
1102                left,
1103                op: And,
1104                right,
1105            }) if expr_contains(&right, &left, And) => Transformed::yes(*right),
1106            // A AND (A OR B) --> A
1107            Expr::BinaryExpr(BinaryExpr {
1108                left,
1109                op: And,
1110                right,
1111            }) if is_op_with(Or, &right, &left) => Transformed::yes(*left),
1112            // (A OR B) AND A --> A
1113            Expr::BinaryExpr(BinaryExpr {
1114                left,
1115                op: And,
1116                right,
1117            }) if is_op_with(Or, &left, &right) => Transformed::yes(*right),
1118            // A >= constant AND constant <= A --> A = constant
1119            Expr::BinaryExpr(BinaryExpr {
1120                left,
1121                op: And,
1122                right,
1123            }) if can_reduce_to_equal_statement(&left, &right) => {
1124                if let Expr::BinaryExpr(BinaryExpr {
1125                    left: left_left,
1126                    right: left_right,
1127                    ..
1128                }) = *left
1129                {
1130                    Transformed::yes(Expr::BinaryExpr(BinaryExpr {
1131                        left: left_left,
1132                        op: Eq,
1133                        right: left_right,
1134                    }))
1135                } else {
1136                    return internal_err!("can_reduce_to_equal_statement should only be called with a BinaryExpr");
1137                }
1138            }
1139
1140            //
1141            // Rules for Multiply
1142            //
1143
1144            // A * 1 --> A (with type coercion if needed)
1145            Expr::BinaryExpr(BinaryExpr {
1146                left,
1147                op: Multiply,
1148                right,
1149            }) if is_one(&right) => {
1150                simplify_right_is_one_case(info, left, &Multiply, &right)?
1151            }
1152            // 1 * A --> A
1153            Expr::BinaryExpr(BinaryExpr {
1154                left,
1155                op: Multiply,
1156                right,
1157            }) if is_one(&left) => {
1158                // 1 * A is equivalent to A * 1
1159                simplify_right_is_one_case(info, right, &Multiply, &left)?
1160            }
1161
1162            // A * 0 --> 0 (if A is not null and not floating, since NAN * 0 -> NAN)
1163            Expr::BinaryExpr(BinaryExpr {
1164                left,
1165                op: Multiply,
1166                right,
1167            }) if !info.nullable(&left)?
1168                && !info.get_data_type(&left)?.is_floating()
1169                && is_zero(&right) =>
1170            {
1171                Transformed::yes(*right)
1172            }
1173            // 0 * A --> 0 (if A is not null and not floating, since 0 * NAN -> NAN)
1174            Expr::BinaryExpr(BinaryExpr {
1175                left,
1176                op: Multiply,
1177                right,
1178            }) if !info.nullable(&right)?
1179                && !info.get_data_type(&right)?.is_floating()
1180                && is_zero(&left) =>
1181            {
1182                Transformed::yes(*left)
1183            }
1184
1185            //
1186            // Rules for Divide
1187            //
1188
1189            // A / 1 --> A
1190            Expr::BinaryExpr(BinaryExpr {
1191                left,
1192                op: Divide,
1193                right,
1194            }) if is_one(&right) => {
1195                simplify_right_is_one_case(info, left, &Divide, &right)?
1196            }
1197
1198            //
1199            // Rules for Modulo
1200            //
1201
1202            // A % 1 --> 0 (if A is not nullable and not floating, since NAN % 1 --> NAN)
1203            Expr::BinaryExpr(BinaryExpr {
1204                left,
1205                op: Modulo,
1206                right,
1207            }) if !info.nullable(&left)?
1208                && !info.get_data_type(&left)?.is_floating()
1209                && is_one(&right) =>
1210            {
1211                Transformed::yes(Expr::Literal(
1212                    ScalarValue::new_zero(&info.get_data_type(&left)?)?,
1213                    None,
1214                ))
1215            }
1216
1217            //
1218            // Rules for BitwiseAnd
1219            //
1220
1221            // A & 0 -> 0 (if A not nullable)
1222            Expr::BinaryExpr(BinaryExpr {
1223                left,
1224                op: BitwiseAnd,
1225                right,
1226            }) if !info.nullable(&left)? && is_zero(&right) => Transformed::yes(*right),
1227
1228            // 0 & A -> 0 (if A not nullable)
1229            Expr::BinaryExpr(BinaryExpr {
1230                left,
1231                op: BitwiseAnd,
1232                right,
1233            }) if !info.nullable(&right)? && is_zero(&left) => Transformed::yes(*left),
1234
1235            // !A & A -> 0 (if A not nullable)
1236            Expr::BinaryExpr(BinaryExpr {
1237                left,
1238                op: BitwiseAnd,
1239                right,
1240            }) if is_negative_of(&left, &right) && !info.nullable(&right)? => {
1241                Transformed::yes(Expr::Literal(
1242                    ScalarValue::new_zero(&info.get_data_type(&left)?)?,
1243                    None,
1244                ))
1245            }
1246
1247            // A & !A -> 0 (if A not nullable)
1248            Expr::BinaryExpr(BinaryExpr {
1249                left,
1250                op: BitwiseAnd,
1251                right,
1252            }) if is_negative_of(&right, &left) && !info.nullable(&left)? => {
1253                Transformed::yes(Expr::Literal(
1254                    ScalarValue::new_zero(&info.get_data_type(&left)?)?,
1255                    None,
1256                ))
1257            }
1258
1259            // (..A..) & A --> (..A..)
1260            Expr::BinaryExpr(BinaryExpr {
1261                left,
1262                op: BitwiseAnd,
1263                right,
1264            }) if expr_contains(&left, &right, BitwiseAnd) => Transformed::yes(*left),
1265
1266            // A & (..A..) --> (..A..)
1267            Expr::BinaryExpr(BinaryExpr {
1268                left,
1269                op: BitwiseAnd,
1270                right,
1271            }) if expr_contains(&right, &left, BitwiseAnd) => Transformed::yes(*right),
1272
1273            // A & (A | B) --> A (if B not null)
1274            Expr::BinaryExpr(BinaryExpr {
1275                left,
1276                op: BitwiseAnd,
1277                right,
1278            }) if !info.nullable(&right)? && is_op_with(BitwiseOr, &right, &left) => {
1279                Transformed::yes(*left)
1280            }
1281
1282            // (A | B) & A --> A (if B not null)
1283            Expr::BinaryExpr(BinaryExpr {
1284                left,
1285                op: BitwiseAnd,
1286                right,
1287            }) if !info.nullable(&left)? && is_op_with(BitwiseOr, &left, &right) => {
1288                Transformed::yes(*right)
1289            }
1290
1291            //
1292            // Rules for BitwiseOr
1293            //
1294
1295            // A | 0 -> A (even if A is null)
1296            Expr::BinaryExpr(BinaryExpr {
1297                left,
1298                op: BitwiseOr,
1299                right,
1300            }) if is_zero(&right) => Transformed::yes(*left),
1301
1302            // 0 | A -> A (even if A is null)
1303            Expr::BinaryExpr(BinaryExpr {
1304                left,
1305                op: BitwiseOr,
1306                right,
1307            }) if is_zero(&left) => Transformed::yes(*right),
1308
1309            // !A | A -> -1 (if A not nullable)
1310            Expr::BinaryExpr(BinaryExpr {
1311                left,
1312                op: BitwiseOr,
1313                right,
1314            }) if is_negative_of(&left, &right) && !info.nullable(&right)? => {
1315                Transformed::yes(Expr::Literal(
1316                    ScalarValue::new_negative_one(&info.get_data_type(&left)?)?,
1317                    None,
1318                ))
1319            }
1320
1321            // A | !A -> -1 (if A not nullable)
1322            Expr::BinaryExpr(BinaryExpr {
1323                left,
1324                op: BitwiseOr,
1325                right,
1326            }) if is_negative_of(&right, &left) && !info.nullable(&left)? => {
1327                Transformed::yes(Expr::Literal(
1328                    ScalarValue::new_negative_one(&info.get_data_type(&left)?)?,
1329                    None,
1330                ))
1331            }
1332
1333            // (..A..) | A --> (..A..)
1334            Expr::BinaryExpr(BinaryExpr {
1335                left,
1336                op: BitwiseOr,
1337                right,
1338            }) if expr_contains(&left, &right, BitwiseOr) => Transformed::yes(*left),
1339
1340            // A | (..A..) --> (..A..)
1341            Expr::BinaryExpr(BinaryExpr {
1342                left,
1343                op: BitwiseOr,
1344                right,
1345            }) if expr_contains(&right, &left, BitwiseOr) => Transformed::yes(*right),
1346
1347            // A | (A & B) --> A (if B not null)
1348            Expr::BinaryExpr(BinaryExpr {
1349                left,
1350                op: BitwiseOr,
1351                right,
1352            }) if !info.nullable(&right)? && is_op_with(BitwiseAnd, &right, &left) => {
1353                Transformed::yes(*left)
1354            }
1355
1356            // (A & B) | A --> A (if B not null)
1357            Expr::BinaryExpr(BinaryExpr {
1358                left,
1359                op: BitwiseOr,
1360                right,
1361            }) if !info.nullable(&left)? && is_op_with(BitwiseAnd, &left, &right) => {
1362                Transformed::yes(*right)
1363            }
1364
1365            //
1366            // Rules for BitwiseXor
1367            //
1368
1369            // A ^ 0 -> A (if A not nullable)
1370            Expr::BinaryExpr(BinaryExpr {
1371                left,
1372                op: BitwiseXor,
1373                right,
1374            }) if !info.nullable(&left)? && is_zero(&right) => Transformed::yes(*left),
1375
1376            // 0 ^ A -> A (if A not nullable)
1377            Expr::BinaryExpr(BinaryExpr {
1378                left,
1379                op: BitwiseXor,
1380                right,
1381            }) if !info.nullable(&right)? && is_zero(&left) => Transformed::yes(*right),
1382
1383            // !A ^ A -> -1 (if A not nullable)
1384            Expr::BinaryExpr(BinaryExpr {
1385                left,
1386                op: BitwiseXor,
1387                right,
1388            }) if is_negative_of(&left, &right) && !info.nullable(&right)? => {
1389                Transformed::yes(Expr::Literal(
1390                    ScalarValue::new_negative_one(&info.get_data_type(&left)?)?,
1391                    None,
1392                ))
1393            }
1394
1395            // A ^ !A -> -1 (if A not nullable)
1396            Expr::BinaryExpr(BinaryExpr {
1397                left,
1398                op: BitwiseXor,
1399                right,
1400            }) if is_negative_of(&right, &left) && !info.nullable(&left)? => {
1401                Transformed::yes(Expr::Literal(
1402                    ScalarValue::new_negative_one(&info.get_data_type(&left)?)?,
1403                    None,
1404                ))
1405            }
1406
1407            // (..A..) ^ A --> (the expression without A, if number of A is odd, otherwise one A)
1408            Expr::BinaryExpr(BinaryExpr {
1409                left,
1410                op: BitwiseXor,
1411                right,
1412            }) if expr_contains(&left, &right, BitwiseXor) => {
1413                let expr = delete_xor_in_complex_expr(&left, &right, false);
1414                Transformed::yes(if expr == *right {
1415                    Expr::Literal(
1416                        ScalarValue::new_zero(&info.get_data_type(&right)?)?,
1417                        None,
1418                    )
1419                } else {
1420                    expr
1421                })
1422            }
1423
1424            // A ^ (..A..) --> (the expression without A, if number of A is odd, otherwise one A)
1425            Expr::BinaryExpr(BinaryExpr {
1426                left,
1427                op: BitwiseXor,
1428                right,
1429            }) if expr_contains(&right, &left, BitwiseXor) => {
1430                let expr = delete_xor_in_complex_expr(&right, &left, true);
1431                Transformed::yes(if expr == *left {
1432                    Expr::Literal(
1433                        ScalarValue::new_zero(&info.get_data_type(&left)?)?,
1434                        None,
1435                    )
1436                } else {
1437                    expr
1438                })
1439            }
1440
1441            //
1442            // Rules for BitwiseShiftRight
1443            //
1444
1445            // A >> 0 -> A (even if A is null)
1446            Expr::BinaryExpr(BinaryExpr {
1447                left,
1448                op: BitwiseShiftRight,
1449                right,
1450            }) if is_zero(&right) => Transformed::yes(*left),
1451
1452            //
1453            // Rules for BitwiseShiftRight
1454            //
1455
1456            // A << 0 -> A (even if A is null)
1457            Expr::BinaryExpr(BinaryExpr {
1458                left,
1459                op: BitwiseShiftLeft,
1460                right,
1461            }) if is_zero(&right) => Transformed::yes(*left),
1462
1463            //
1464            // Rules for Not
1465            //
1466            Expr::Not(inner) => Transformed::yes(negate_clause(*inner)),
1467
1468            //
1469            // Rules for Negative
1470            //
1471            Expr::Negative(inner) => Transformed::yes(distribute_negation(*inner)),
1472
1473            //
1474            // Rules for Case
1475            //
1476
1477            // Inline a comparison to a literal with the case statement into the `THEN` clauses.
1478            // which can enable further simplifications
1479            // CASE WHEN X THEN "a" WHEN Y THEN "b" ... END = "a" --> CASE WHEN X THEN "a" = "a" WHEN Y THEN "b" = "a" END
1480            Expr::BinaryExpr(BinaryExpr {
1481                left,
1482                op: op @ (Eq | NotEq),
1483                right,
1484            }) if is_case_with_literal_outputs(&left) && is_lit(&right) => {
1485                let case = into_case(*left)?;
1486                Transformed::yes(Expr::Case(Case {
1487                    expr: None,
1488                    when_then_expr: case
1489                        .when_then_expr
1490                        .into_iter()
1491                        .map(|(when, then)| {
1492                            (
1493                                when,
1494                                Box::new(Expr::BinaryExpr(BinaryExpr {
1495                                    left: then,
1496                                    op,
1497                                    right: right.clone(),
1498                                })),
1499                            )
1500                        })
1501                        .collect(),
1502                    else_expr: case.else_expr.map(|els| {
1503                        Box::new(Expr::BinaryExpr(BinaryExpr {
1504                            left: els,
1505                            op,
1506                            right,
1507                        }))
1508                    }),
1509                }))
1510            }
1511
1512            // CASE WHEN true THEN A ... END --> A
1513            // CASE WHEN X THEN A WHEN TRUE THEN B ... END --> CASE WHEN X THEN A ELSE B END
1514            // CASE WHEN false THEN A END --> NULL
1515            // CASE WHEN false THEN A ELSE B END --> B
1516            // CASE WHEN X THEN A WHEN false THEN B END --> CASE WHEN X THEN A ELSE B END
1517            Expr::Case(Case {
1518                expr: None,
1519                when_then_expr,
1520                mut else_expr,
1521            }) if when_then_expr
1522                .iter()
1523                .any(|(when, _)| is_true(when.as_ref()) || is_false(when.as_ref())) =>
1524            {
1525                let out_type = info.get_data_type(&when_then_expr[0].1)?;
1526                let mut new_when_then_expr = Vec::with_capacity(when_then_expr.len());
1527
1528                for (when, then) in when_then_expr.into_iter() {
1529                    if is_true(when.as_ref()) {
1530                        // Skip adding the rest of the when-then expressions after WHEN true
1531                        // CASE WHEN X THEN A WHEN TRUE THEN B ... END --> CASE WHEN X THEN A ELSE B END
1532                        else_expr = Some(then);
1533                        break;
1534                    } else if !is_false(when.as_ref()) {
1535                        new_when_then_expr.push((when, then));
1536                    }
1537                    // else: skip WHEN false cases
1538                }
1539
1540                // Exclude CASE statement altogether if there are no when-then expressions left
1541                if new_when_then_expr.is_empty() {
1542                    // CASE WHEN false THEN A ELSE B END --> B
1543                    if let Some(else_expr) = else_expr {
1544                        return Ok(Transformed::yes(*else_expr));
1545                    // CASE WHEN false THEN A END --> NULL
1546                    } else {
1547                        let null =
1548                            Expr::Literal(ScalarValue::try_new_null(&out_type)?, None);
1549                        return Ok(Transformed::yes(null));
1550                    }
1551                }
1552
1553                Transformed::yes(Expr::Case(Case {
1554                    expr: None,
1555                    when_then_expr: new_when_then_expr,
1556                    else_expr,
1557                }))
1558            }
1559
1560            // CASE
1561            //   WHEN X THEN A
1562            //   WHEN Y THEN B
1563            //   ...
1564            //   ELSE Q
1565            // END
1566            //
1567            // ---> (X AND A) OR (Y AND B AND NOT X) OR ... (NOT (X OR Y) AND Q)
1568            //
1569            // Note: the rationale for this rewrite is that the expr can then be further
1570            // simplified using the existing rules for AND/OR
1571            Expr::Case(Case {
1572                expr: None,
1573                when_then_expr,
1574                else_expr,
1575            }) if !when_then_expr.is_empty()
1576                // The rewrite is O(n²) in general so limit to small number of when-thens that can be true
1577                && (when_then_expr.len() < 3 // small number of input whens
1578                    // or all thens are literal bools and a small number of them are true
1579                    || (when_then_expr.iter().all(|(_, then)| is_bool_lit(then))
1580                        && when_then_expr.iter().filter(|(_, then)| is_true(then)).count() < 3))
1581                && info.is_boolean_type(&when_then_expr[0].1)? =>
1582            {
1583                // String disjunction of all the when predicates encountered so far. Not nullable.
1584                let mut filter_expr = lit(false);
1585                // The disjunction of all the cases
1586                let mut out_expr = lit(false);
1587
1588                for (when, then) in when_then_expr {
1589                    let when = is_exactly_true(*when, info)?;
1590                    let case_expr =
1591                        when.clone().and(filter_expr.clone().not()).and(*then);
1592
1593                    out_expr = out_expr.or(case_expr);
1594                    filter_expr = filter_expr.or(when);
1595                }
1596
1597                let else_expr = else_expr.map(|b| *b).unwrap_or_else(lit_bool_null);
1598                let case_expr = filter_expr.not().and(else_expr);
1599                out_expr = out_expr.or(case_expr);
1600
1601                // Do a first pass at simplification
1602                out_expr.rewrite(self)?
1603            }
1604            // CASE
1605            //   WHEN X THEN true
1606            //   WHEN Y THEN true
1607            //   WHEN Z THEN false
1608            //   ...
1609            //   ELSE true
1610            // END
1611            //
1612            // --->
1613            //
1614            // NOT(CASE
1615            //   WHEN X THEN false
1616            //   WHEN Y THEN false
1617            //   WHEN Z THEN true
1618            //   ...
1619            //   ELSE false
1620            // END)
1621            //
1622            // Note: the rationale for this rewrite is that the case can then be further
1623            // simplified into a small number of ANDs and ORs
1624            Expr::Case(Case {
1625                expr: None,
1626                when_then_expr,
1627                else_expr,
1628            }) if !when_then_expr.is_empty()
1629                && when_then_expr
1630                    .iter()
1631                    .all(|(_, then)| is_bool_lit(then)) // all thens are literal bools
1632                // This simplification is only helpful if we end up with a small number of true thens
1633                && when_then_expr
1634                    .iter()
1635                    .filter(|(_, then)| is_false(then))
1636                    .count()
1637                    < 3
1638                && else_expr.as_deref().is_none_or(is_bool_lit) =>
1639            {
1640                Transformed::yes(
1641                    Expr::Case(Case {
1642                        expr: None,
1643                        when_then_expr: when_then_expr
1644                            .into_iter()
1645                            .map(|(when, then)| (when, Box::new(Expr::Not(then))))
1646                            .collect(),
1647                        else_expr: else_expr
1648                            .map(|else_expr| Box::new(Expr::Not(else_expr))),
1649                    })
1650                    .not(),
1651                )
1652            }
1653            Expr::ScalarFunction(ScalarFunction { func: udf, args }) => {
1654                match udf.simplify(args, info)? {
1655                    ExprSimplifyResult::Original(args) => {
1656                        Transformed::no(Expr::ScalarFunction(ScalarFunction {
1657                            func: udf,
1658                            args,
1659                        }))
1660                    }
1661                    ExprSimplifyResult::Simplified(expr) => Transformed::yes(expr),
1662                }
1663            }
1664
1665            Expr::AggregateFunction(datafusion_expr::expr::AggregateFunction {
1666                ref func,
1667                ..
1668            }) => match (func.simplify(), expr) {
1669                (Some(simplify_function), Expr::AggregateFunction(af)) => {
1670                    Transformed::yes(simplify_function(af, info)?)
1671                }
1672                (_, expr) => Transformed::no(expr),
1673            },
1674
1675            Expr::WindowFunction(ref window_fun) => match (window_fun.simplify(), expr) {
1676                (Some(simplify_function), Expr::WindowFunction(wf)) => {
1677                    Transformed::yes(simplify_function(*wf, info)?)
1678                }
1679                (_, expr) => Transformed::no(expr),
1680            },
1681
1682            //
1683            // Rules for Between
1684            //
1685
1686            // a between 3 and 5  -->  a >= 3 AND a <=5
1687            // a not between 3 and 5  -->  a < 3 OR a > 5
1688            Expr::Between(between) => Transformed::yes(if between.negated {
1689                let l = *between.expr.clone();
1690                let r = *between.expr;
1691                or(l.lt(*between.low), r.gt(*between.high))
1692            } else {
1693                and(
1694                    between.expr.clone().gt_eq(*between.low),
1695                    between.expr.lt_eq(*between.high),
1696                )
1697            }),
1698
1699            //
1700            // Rules for regexes
1701            //
1702            Expr::BinaryExpr(BinaryExpr {
1703                left,
1704                op: op @ (RegexMatch | RegexNotMatch | RegexIMatch | RegexNotIMatch),
1705                right,
1706            }) => Transformed::yes(simplify_regex_expr(left, op, right)?),
1707
1708            // Rules for Like
1709            Expr::Like(like) => {
1710                // `\` is implicit escape, see https://github.com/apache/datafusion/issues/13291
1711                let escape_char = like.escape_char.unwrap_or('\\');
1712                match as_string_scalar(&like.pattern) {
1713                    Some((data_type, pattern_str)) => {
1714                        match pattern_str {
1715                            None => return Ok(Transformed::yes(lit_bool_null())),
1716                            Some(pattern_str) if pattern_str == "%" => {
1717                                // exp LIKE '%' is
1718                                //   - when exp is not NULL, it's true
1719                                //   - when exp is NULL, it's NULL
1720                                // exp NOT LIKE '%' is
1721                                //   - when exp is not NULL, it's false
1722                                //   - when exp is NULL, it's NULL
1723                                let result_for_non_null = lit(!like.negated);
1724                                Transformed::yes(if !info.nullable(&like.expr)? {
1725                                    result_for_non_null
1726                                } else {
1727                                    Expr::Case(Case {
1728                                        expr: Some(Box::new(Expr::IsNotNull(like.expr))),
1729                                        when_then_expr: vec![(
1730                                            Box::new(lit(true)),
1731                                            Box::new(result_for_non_null),
1732                                        )],
1733                                        else_expr: None,
1734                                    })
1735                                })
1736                            }
1737                            Some(pattern_str)
1738                                if pattern_str.contains("%%")
1739                                    && !pattern_str.contains(escape_char) =>
1740                            {
1741                                // Repeated occurrences of wildcard are redundant so remove them
1742                                // exp LIKE '%%'  --> exp LIKE '%'
1743                                let simplified_pattern = Regex::new("%%+")
1744                                    .unwrap()
1745                                    .replace_all(pattern_str, "%")
1746                                    .to_string();
1747                                Transformed::yes(Expr::Like(Like {
1748                                    pattern: Box::new(to_string_scalar(
1749                                        data_type,
1750                                        Some(simplified_pattern),
1751                                    )),
1752                                    ..like
1753                                }))
1754                            }
1755                            Some(pattern_str)
1756                                if !like.case_insensitive
1757                                    && !pattern_str
1758                                        .contains(['%', '_', escape_char].as_ref()) =>
1759                            {
1760                                // If the pattern does not contain any wildcards, we can simplify the like expression to an equality expression
1761                                // TODO: handle escape characters
1762                                Transformed::yes(Expr::BinaryExpr(BinaryExpr {
1763                                    left: like.expr.clone(),
1764                                    op: if like.negated { NotEq } else { Eq },
1765                                    right: like.pattern.clone(),
1766                                }))
1767                            }
1768
1769                            Some(_pattern_str) => Transformed::no(Expr::Like(like)),
1770                        }
1771                    }
1772                    None => Transformed::no(Expr::Like(like)),
1773                }
1774            }
1775
1776            // a is not null/unknown --> true (if a is not nullable)
1777            Expr::IsNotNull(expr) | Expr::IsNotUnknown(expr)
1778                if !info.nullable(&expr)? =>
1779            {
1780                Transformed::yes(lit(true))
1781            }
1782
1783            // a is null/unknown --> false (if a is not nullable)
1784            Expr::IsNull(expr) | Expr::IsUnknown(expr) if !info.nullable(&expr)? => {
1785                Transformed::yes(lit(false))
1786            }
1787
1788            // expr IN () --> false
1789            // expr NOT IN () --> true
1790            Expr::InList(InList {
1791                expr: _,
1792                list,
1793                negated,
1794            }) if list.is_empty() => Transformed::yes(lit(negated)),
1795
1796            // null in (x, y, z) --> null
1797            // null not in (x, y, z) --> null
1798            Expr::InList(InList {
1799                expr,
1800                list,
1801                negated: _,
1802            }) if is_null(expr.as_ref()) && !list.is_empty() => {
1803                Transformed::yes(lit_bool_null())
1804            }
1805
1806            // expr IN ((subquery)) -> expr IN (subquery), see ##5529
1807            Expr::InList(InList {
1808                expr,
1809                mut list,
1810                negated,
1811            }) if list.len() == 1
1812                && matches!(list.first(), Some(Expr::ScalarSubquery { .. })) =>
1813            {
1814                let Expr::ScalarSubquery(subquery) = list.remove(0) else {
1815                    unreachable!()
1816                };
1817
1818                Transformed::yes(Expr::InSubquery(InSubquery::new(
1819                    expr, subquery, negated,
1820                )))
1821            }
1822
1823            // Combine multiple OR expressions into a single IN list expression if possible
1824            //
1825            // i.e. `a = 1 OR a = 2 OR a = 3` -> `a IN (1, 2, 3)`
1826            Expr::BinaryExpr(BinaryExpr {
1827                left,
1828                op: Or,
1829                right,
1830            }) if are_inlist_and_eq(left.as_ref(), right.as_ref()) => {
1831                let lhs = to_inlist(*left).unwrap();
1832                let rhs = to_inlist(*right).unwrap();
1833                let mut seen: HashSet<Expr> = HashSet::new();
1834                let list = lhs
1835                    .list
1836                    .into_iter()
1837                    .chain(rhs.list)
1838                    .filter(|e| seen.insert(e.to_owned()))
1839                    .collect::<Vec<_>>();
1840
1841                let merged_inlist = InList {
1842                    expr: lhs.expr,
1843                    list,
1844                    negated: false,
1845                };
1846
1847                Transformed::yes(Expr::InList(merged_inlist))
1848            }
1849
1850            // Simplify expressions that is guaranteed to be true or false to a literal boolean expression
1851            //
1852            // Rules:
1853            // If both expressions are `IN` or `NOT IN`, then we can apply intersection or union on both lists
1854            //   Intersection:
1855            //     1. `a in (1,2,3) AND a in (4,5) -> a in (), which is false`
1856            //     2. `a in (1,2,3) AND a in (2,3,4) -> a in (2,3)`
1857            //     3. `a not in (1,2,3) OR a not in (3,4,5,6) -> a not in (3)`
1858            //   Union:
1859            //     4. `a not int (1,2,3) AND a not in (4,5,6) -> a not in (1,2,3,4,5,6)`
1860            //     # This rule is handled by `or_in_list_simplifier.rs`
1861            //     5. `a in (1,2,3) OR a in (4,5,6) -> a in (1,2,3,4,5,6)`
1862            // If one of the expressions is `IN` and another one is `NOT IN`, then we apply exception on `In` expression
1863            //     6. `a in (1,2,3,4) AND a not in (1,2,3,4,5) -> a in (), which is false`
1864            //     7. `a not in (1,2,3,4) AND a in (1,2,3,4,5) -> a = 5`
1865            //     8. `a in (1,2,3,4) AND a not in (5,6,7,8) -> a in (1,2,3,4)`
1866            Expr::BinaryExpr(BinaryExpr {
1867                left,
1868                op: And,
1869                right,
1870            }) if are_inlist_and_eq_and_match_neg(
1871                left.as_ref(),
1872                right.as_ref(),
1873                false,
1874                false,
1875            ) =>
1876            {
1877                match (*left, *right) {
1878                    (Expr::InList(l1), Expr::InList(l2)) => {
1879                        return inlist_intersection(l1, &l2, false).map(Transformed::yes);
1880                    }
1881                    // Matched previously once
1882                    _ => unreachable!(),
1883                }
1884            }
1885
1886            Expr::BinaryExpr(BinaryExpr {
1887                left,
1888                op: And,
1889                right,
1890            }) if are_inlist_and_eq_and_match_neg(
1891                left.as_ref(),
1892                right.as_ref(),
1893                true,
1894                true,
1895            ) =>
1896            {
1897                match (*left, *right) {
1898                    (Expr::InList(l1), Expr::InList(l2)) => {
1899                        return inlist_union(l1, l2, true).map(Transformed::yes);
1900                    }
1901                    // Matched previously once
1902                    _ => unreachable!(),
1903                }
1904            }
1905
1906            Expr::BinaryExpr(BinaryExpr {
1907                left,
1908                op: And,
1909                right,
1910            }) if are_inlist_and_eq_and_match_neg(
1911                left.as_ref(),
1912                right.as_ref(),
1913                false,
1914                true,
1915            ) =>
1916            {
1917                match (*left, *right) {
1918                    (Expr::InList(l1), Expr::InList(l2)) => {
1919                        return inlist_except(l1, &l2).map(Transformed::yes);
1920                    }
1921                    // Matched previously once
1922                    _ => unreachable!(),
1923                }
1924            }
1925
1926            Expr::BinaryExpr(BinaryExpr {
1927                left,
1928                op: And,
1929                right,
1930            }) if are_inlist_and_eq_and_match_neg(
1931                left.as_ref(),
1932                right.as_ref(),
1933                true,
1934                false,
1935            ) =>
1936            {
1937                match (*left, *right) {
1938                    (Expr::InList(l1), Expr::InList(l2)) => {
1939                        return inlist_except(l2, &l1).map(Transformed::yes);
1940                    }
1941                    // Matched previously once
1942                    _ => unreachable!(),
1943                }
1944            }
1945
1946            Expr::BinaryExpr(BinaryExpr {
1947                left,
1948                op: Or,
1949                right,
1950            }) if are_inlist_and_eq_and_match_neg(
1951                left.as_ref(),
1952                right.as_ref(),
1953                true,
1954                true,
1955            ) =>
1956            {
1957                match (*left, *right) {
1958                    (Expr::InList(l1), Expr::InList(l2)) => {
1959                        return inlist_intersection(l1, &l2, true).map(Transformed::yes);
1960                    }
1961                    // Matched previously once
1962                    _ => unreachable!(),
1963                }
1964            }
1965
1966            // =======================================
1967            // unwrap_cast_in_comparison
1968            // =======================================
1969            //
1970            // For case:
1971            // try_cast/cast(expr as data_type) op literal
1972            Expr::BinaryExpr(BinaryExpr { left, op, right })
1973                if is_cast_expr_and_support_unwrap_cast_in_comparison_for_binary(
1974                    info, &left, op, &right,
1975                ) && op.supports_propagation() =>
1976            {
1977                unwrap_cast_in_comparison_for_binary(info, *left, *right, op)?
1978            }
1979            // literal op try_cast/cast(expr as data_type)
1980            // -->
1981            // try_cast/cast(expr as data_type) op_swap literal
1982            Expr::BinaryExpr(BinaryExpr { left, op, right })
1983                if is_cast_expr_and_support_unwrap_cast_in_comparison_for_binary(
1984                    info, &right, op, &left,
1985                ) && op.supports_propagation()
1986                    && op.swap().is_some() =>
1987            {
1988                unwrap_cast_in_comparison_for_binary(
1989                    info,
1990                    *right,
1991                    *left,
1992                    op.swap().unwrap(),
1993                )?
1994            }
1995            // For case:
1996            // try_cast/cast(expr as left_type) in (expr1,expr2,expr3)
1997            Expr::InList(InList {
1998                expr: mut left,
1999                list,
2000                negated,
2001            }) if is_cast_expr_and_support_unwrap_cast_in_comparison_for_inlist(
2002                info, &left, &list,
2003            ) =>
2004            {
2005                let (Expr::TryCast(TryCast {
2006                    expr: left_expr, ..
2007                })
2008                | Expr::Cast(Cast {
2009                    expr: left_expr, ..
2010                })) = left.as_mut()
2011                else {
2012                    return internal_err!("Expect cast expr, but got {:?}", left)?;
2013                };
2014
2015                let expr_type = info.get_data_type(left_expr)?;
2016                let right_exprs = list
2017                    .into_iter()
2018                    .map(|right| {
2019                        match right {
2020                            Expr::Literal(right_lit_value, _) => {
2021                                // if the right_lit_value can be casted to the type of internal_left_expr
2022                                // we need to unwrap the cast for cast/try_cast expr, and add cast to the literal
2023                                let Some(value) = try_cast_literal_to_type(&right_lit_value, &expr_type) else {
2024                                    internal_err!(
2025                                        "Can't cast the list expr {:?} to type {}",
2026                                        right_lit_value, &expr_type
2027                                    )?
2028                                };
2029                                Ok(lit(value))
2030                            }
2031                            other_expr => internal_err!(
2032                                "Only support literal expr to optimize, but the expr is {:?}",
2033                                &other_expr
2034                            ),
2035                        }
2036                    })
2037                    .collect::<Result<Vec<_>>>()?;
2038
2039                Transformed::yes(Expr::InList(InList {
2040                    expr: std::mem::take(left_expr),
2041                    list: right_exprs,
2042                    negated,
2043                }))
2044            }
2045
2046            // no additional rewrites possible
2047            expr => Transformed::no(expr),
2048        })
2049    }
2050}
2051
2052fn as_string_scalar(expr: &Expr) -> Option<(DataType, &Option<String>)> {
2053    match expr {
2054        Expr::Literal(ScalarValue::Utf8(s), _) => Some((DataType::Utf8, s)),
2055        Expr::Literal(ScalarValue::LargeUtf8(s), _) => Some((DataType::LargeUtf8, s)),
2056        Expr::Literal(ScalarValue::Utf8View(s), _) => Some((DataType::Utf8View, s)),
2057        _ => None,
2058    }
2059}
2060
2061fn to_string_scalar(data_type: DataType, value: Option<String>) -> Expr {
2062    match data_type {
2063        DataType::Utf8 => Expr::Literal(ScalarValue::Utf8(value), None),
2064        DataType::LargeUtf8 => Expr::Literal(ScalarValue::LargeUtf8(value), None),
2065        DataType::Utf8View => Expr::Literal(ScalarValue::Utf8View(value), None),
2066        _ => unreachable!(),
2067    }
2068}
2069
2070fn has_common_conjunction(lhs: &Expr, rhs: &Expr) -> bool {
2071    let lhs_set: HashSet<&Expr> = iter_conjunction(lhs).collect();
2072    iter_conjunction(rhs).any(|e| lhs_set.contains(&e) && !e.is_volatile())
2073}
2074
2075// TODO: We might not need this after defer pattern for Box is stabilized. https://github.com/rust-lang/rust/issues/87121
2076fn are_inlist_and_eq_and_match_neg(
2077    left: &Expr,
2078    right: &Expr,
2079    is_left_neg: bool,
2080    is_right_neg: bool,
2081) -> bool {
2082    match (left, right) {
2083        (Expr::InList(l), Expr::InList(r)) => {
2084            l.expr == r.expr && l.negated == is_left_neg && r.negated == is_right_neg
2085        }
2086        _ => false,
2087    }
2088}
2089
2090// TODO: We might not need this after defer pattern for Box is stabilized. https://github.com/rust-lang/rust/issues/87121
2091fn are_inlist_and_eq(left: &Expr, right: &Expr) -> bool {
2092    let left = as_inlist(left);
2093    let right = as_inlist(right);
2094    if let (Some(lhs), Some(rhs)) = (left, right) {
2095        matches!(lhs.expr.as_ref(), Expr::Column(_))
2096            && matches!(rhs.expr.as_ref(), Expr::Column(_))
2097            && lhs.expr == rhs.expr
2098            && !lhs.negated
2099            && !rhs.negated
2100    } else {
2101        false
2102    }
2103}
2104
2105/// Try to convert an expression to an in-list expression
2106fn as_inlist(expr: &'_ Expr) -> Option<Cow<'_, InList>> {
2107    match expr {
2108        Expr::InList(inlist) => Some(Cow::Borrowed(inlist)),
2109        Expr::BinaryExpr(BinaryExpr { left, op, right }) if *op == Operator::Eq => {
2110            match (left.as_ref(), right.as_ref()) {
2111                (Expr::Column(_), Expr::Literal(_, _)) => Some(Cow::Owned(InList {
2112                    expr: left.clone(),
2113                    list: vec![*right.clone()],
2114                    negated: false,
2115                })),
2116                (Expr::Literal(_, _), Expr::Column(_)) => Some(Cow::Owned(InList {
2117                    expr: right.clone(),
2118                    list: vec![*left.clone()],
2119                    negated: false,
2120                })),
2121                _ => None,
2122            }
2123        }
2124        _ => None,
2125    }
2126}
2127
2128fn to_inlist(expr: Expr) -> Option<InList> {
2129    match expr {
2130        Expr::InList(inlist) => Some(inlist),
2131        Expr::BinaryExpr(BinaryExpr {
2132            left,
2133            op: Operator::Eq,
2134            right,
2135        }) => match (left.as_ref(), right.as_ref()) {
2136            (Expr::Column(_), Expr::Literal(_, _)) => Some(InList {
2137                expr: left,
2138                list: vec![*right],
2139                negated: false,
2140            }),
2141            (Expr::Literal(_, _), Expr::Column(_)) => Some(InList {
2142                expr: right,
2143                list: vec![*left],
2144                negated: false,
2145            }),
2146            _ => None,
2147        },
2148        _ => None,
2149    }
2150}
2151
2152/// Return the union of two inlist expressions
2153/// maintaining the order of the elements in the two lists
2154fn inlist_union(mut l1: InList, l2: InList, negated: bool) -> Result<Expr> {
2155    // extend the list in l1 with the elements in l2 that are not already in l1
2156    let l1_items: HashSet<_> = l1.list.iter().collect();
2157
2158    // keep all l2 items that do not also appear in l1
2159    let keep_l2: Vec<_> = l2
2160        .list
2161        .into_iter()
2162        .filter_map(|e| if l1_items.contains(&e) { None } else { Some(e) })
2163        .collect();
2164
2165    l1.list.extend(keep_l2);
2166    l1.negated = negated;
2167    Ok(Expr::InList(l1))
2168}
2169
2170/// Return the intersection of two inlist expressions
2171/// maintaining the order of the elements in the two lists
2172fn inlist_intersection(mut l1: InList, l2: &InList, negated: bool) -> Result<Expr> {
2173    let l2_items = l2.list.iter().collect::<HashSet<_>>();
2174
2175    // remove all items from l1 that are not in l2
2176    l1.list.retain(|e| l2_items.contains(e));
2177
2178    // e in () is always false
2179    // e not in () is always true
2180    if l1.list.is_empty() {
2181        return Ok(lit(negated));
2182    }
2183    Ok(Expr::InList(l1))
2184}
2185
2186/// Return the all items in l1 that are not in l2
2187/// maintaining the order of the elements in the two lists
2188fn inlist_except(mut l1: InList, l2: &InList) -> Result<Expr> {
2189    let l2_items = l2.list.iter().collect::<HashSet<_>>();
2190
2191    // keep only items from l1 that are not in l2
2192    l1.list.retain(|e| !l2_items.contains(e));
2193
2194    if l1.list.is_empty() {
2195        return Ok(lit(false));
2196    }
2197    Ok(Expr::InList(l1))
2198}
2199
2200/// Returns expression testing a boolean `expr` for being exactly `true` (not `false` or NULL).
2201fn is_exactly_true(expr: Expr, info: &impl SimplifyInfo) -> Result<Expr> {
2202    if !info.nullable(&expr)? {
2203        Ok(expr)
2204    } else {
2205        Ok(Expr::BinaryExpr(BinaryExpr {
2206            left: Box::new(expr),
2207            op: Operator::IsNotDistinctFrom,
2208            right: Box::new(lit(true)),
2209        }))
2210    }
2211}
2212
2213// A * 1 -> A
2214// A / 1 -> A
2215//
2216// Move this function body out of the large match branch avoid stack overflow
2217fn simplify_right_is_one_case<S: SimplifyInfo>(
2218    info: &S,
2219    left: Box<Expr>,
2220    op: &Operator,
2221    right: &Expr,
2222) -> Result<Transformed<Expr>> {
2223    // Check if resulting type would be different due to coercion
2224    let left_type = info.get_data_type(&left)?;
2225    let right_type = info.get_data_type(right)?;
2226    match BinaryTypeCoercer::new(&left_type, op, &right_type).get_result_type() {
2227        Ok(result_type) => {
2228            // Only cast if the types differ
2229            if left_type != result_type {
2230                Ok(Transformed::yes(Expr::Cast(Cast::new(left, result_type))))
2231            } else {
2232                Ok(Transformed::yes(*left))
2233            }
2234        }
2235        Err(_) => Ok(Transformed::yes(*left)),
2236    }
2237}
2238
2239#[cfg(test)]
2240mod tests {
2241    use super::*;
2242    use crate::simplify_expressions::SimplifyContext;
2243    use crate::test::test_table_scan_with_name;
2244    use arrow::datatypes::FieldRef;
2245    use datafusion_common::{assert_contains, DFSchemaRef, ToDFSchema};
2246    use datafusion_expr::{
2247        expr::WindowFunction,
2248        function::{
2249            AccumulatorArgs, AggregateFunctionSimplification,
2250            WindowFunctionSimplification,
2251        },
2252        interval_arithmetic::Interval,
2253        *,
2254    };
2255    use datafusion_functions_window_common::field::WindowUDFFieldArgs;
2256    use datafusion_functions_window_common::partition::PartitionEvaluatorArgs;
2257    use datafusion_physical_expr::PhysicalExpr;
2258    use std::hash::Hash;
2259    use std::sync::LazyLock;
2260    use std::{
2261        collections::HashMap,
2262        ops::{BitAnd, BitOr, BitXor},
2263        sync::Arc,
2264    };
2265
2266    // ------------------------------
2267    // --- ExprSimplifier tests -----
2268    // ------------------------------
2269    #[test]
2270    fn api_basic() {
2271        let props = ExecutionProps::new();
2272        let simplifier =
2273            ExprSimplifier::new(SimplifyContext::new(&props).with_schema(test_schema()));
2274
2275        let expr = lit(1) + lit(2);
2276        let expected = lit(3);
2277        assert_eq!(expected, simplifier.simplify(expr).unwrap());
2278    }
2279
2280    #[test]
2281    fn basic_coercion() {
2282        let schema = test_schema();
2283        let props = ExecutionProps::new();
2284        let simplifier = ExprSimplifier::new(
2285            SimplifyContext::new(&props).with_schema(Arc::clone(&schema)),
2286        );
2287
2288        // Note expr type is int32 (not int64)
2289        // (1i64 + 2i32) < i
2290        let expr = (lit(1i64) + lit(2i32)).lt(col("i"));
2291        // should fully simplify to 3 < i (though i has been coerced to i64)
2292        let expected = lit(3i64).lt(col("i"));
2293
2294        let expr = simplifier.coerce(expr, &schema).unwrap();
2295
2296        assert_eq!(expected, simplifier.simplify(expr).unwrap());
2297    }
2298
2299    fn test_schema() -> DFSchemaRef {
2300        static TEST_SCHEMA: LazyLock<DFSchemaRef> = LazyLock::new(|| {
2301            Schema::new(vec![
2302                Field::new("i", DataType::Int64, false),
2303                Field::new("b", DataType::Boolean, true),
2304            ])
2305            .to_dfschema_ref()
2306            .unwrap()
2307        });
2308        Arc::clone(&TEST_SCHEMA)
2309    }
2310
2311    #[test]
2312    fn simplify_and_constant_prop() {
2313        let props = ExecutionProps::new();
2314        let simplifier =
2315            ExprSimplifier::new(SimplifyContext::new(&props).with_schema(test_schema()));
2316
2317        // should be able to simplify to false
2318        // (i * (1 - 2)) > 0
2319        let expr = (col("i") * (lit(1) - lit(1))).gt(lit(0));
2320        let expected = lit(false);
2321        assert_eq!(expected, simplifier.simplify(expr).unwrap());
2322    }
2323
2324    #[test]
2325    fn simplify_and_constant_prop_with_case() {
2326        let props = ExecutionProps::new();
2327        let simplifier =
2328            ExprSimplifier::new(SimplifyContext::new(&props).with_schema(test_schema()));
2329
2330        //   CASE
2331        //     WHEN i>5 AND false THEN i > 5
2332        //     WHEN i<5 AND true THEN i < 5
2333        //     ELSE false
2334        //   END
2335        //
2336        // Can be simplified to `i < 5`
2337        let expr = when(col("i").gt(lit(5)).and(lit(false)), col("i").gt(lit(5)))
2338            .when(col("i").lt(lit(5)).and(lit(true)), col("i").lt(lit(5)))
2339            .otherwise(lit(false))
2340            .unwrap();
2341        let expected = col("i").lt(lit(5));
2342        assert_eq!(expected, simplifier.simplify(expr).unwrap());
2343    }
2344
2345    // ------------------------------
2346    // --- Simplifier tests -----
2347    // ------------------------------
2348
2349    #[test]
2350    fn test_simplify_canonicalize() {
2351        {
2352            let expr = lit(1).lt(col("c2")).and(col("c2").gt(lit(1)));
2353            let expected = col("c2").gt(lit(1));
2354            assert_eq!(simplify(expr), expected);
2355        }
2356        {
2357            let expr = col("c1").lt(col("c2")).and(col("c2").gt(col("c1")));
2358            let expected = col("c2").gt(col("c1"));
2359            assert_eq!(simplify(expr), expected);
2360        }
2361        {
2362            let expr = col("c1")
2363                .eq(lit(1))
2364                .and(lit(1).eq(col("c1")))
2365                .and(col("c1").eq(lit(3)));
2366            let expected = col("c1").eq(lit(1)).and(col("c1").eq(lit(3)));
2367            assert_eq!(simplify(expr), expected);
2368        }
2369        {
2370            let expr = col("c1")
2371                .eq(col("c2"))
2372                .and(col("c1").gt(lit(5)))
2373                .and(col("c2").eq(col("c1")));
2374            let expected = col("c2").eq(col("c1")).and(col("c1").gt(lit(5)));
2375            assert_eq!(simplify(expr), expected);
2376        }
2377        {
2378            let expr = col("c1")
2379                .eq(lit(1))
2380                .and(col("c2").gt(lit(3)).or(lit(3).lt(col("c2"))));
2381            let expected = col("c1").eq(lit(1)).and(col("c2").gt(lit(3)));
2382            assert_eq!(simplify(expr), expected);
2383        }
2384        {
2385            let expr = col("c1").lt(lit(5)).and(col("c1").gt_eq(lit(5)));
2386            let expected = col("c1").lt(lit(5)).and(col("c1").gt_eq(lit(5)));
2387            assert_eq!(simplify(expr), expected);
2388        }
2389        {
2390            let expr = col("c1").lt(lit(5)).and(col("c1").gt_eq(lit(5)));
2391            let expected = col("c1").lt(lit(5)).and(col("c1").gt_eq(lit(5)));
2392            assert_eq!(simplify(expr), expected);
2393        }
2394        {
2395            let expr = col("c1").gt(col("c2")).and(col("c1").gt(col("c2")));
2396            let expected = col("c2").lt(col("c1"));
2397            assert_eq!(simplify(expr), expected);
2398        }
2399    }
2400
2401    #[test]
2402    fn test_simplify_eq_not_self() {
2403        // `expr_a`: column `c2` is nullable, so `c2 = c2` simplifies to `c2 IS NOT NULL OR NULL`
2404        // This ensures the expression is only true when `c2` is not NULL, accounting for SQL's NULL semantics.
2405        let expr_a = col("c2").eq(col("c2"));
2406        let expected_a = col("c2").is_not_null().or(lit_bool_null());
2407
2408        // `expr_b`: column `c2_non_null` is explicitly non-nullable, so `c2_non_null = c2_non_null` is always true
2409        let expr_b = col("c2_non_null").eq(col("c2_non_null"));
2410        let expected_b = lit(true);
2411
2412        assert_eq!(simplify(expr_a), expected_a);
2413        assert_eq!(simplify(expr_b), expected_b);
2414    }
2415
2416    #[test]
2417    fn test_simplify_or_true() {
2418        let expr_a = col("c2").or(lit(true));
2419        let expr_b = lit(true).or(col("c2"));
2420        let expected = lit(true);
2421
2422        assert_eq!(simplify(expr_a), expected);
2423        assert_eq!(simplify(expr_b), expected);
2424    }
2425
2426    #[test]
2427    fn test_simplify_or_false() {
2428        let expr_a = lit(false).or(col("c2"));
2429        let expr_b = col("c2").or(lit(false));
2430        let expected = col("c2");
2431
2432        assert_eq!(simplify(expr_a), expected);
2433        assert_eq!(simplify(expr_b), expected);
2434    }
2435
2436    #[test]
2437    fn test_simplify_or_same() {
2438        let expr = col("c2").or(col("c2"));
2439        let expected = col("c2");
2440
2441        assert_eq!(simplify(expr), expected);
2442    }
2443
2444    #[test]
2445    fn test_simplify_or_not_self() {
2446        // A OR !A if A is not nullable --> true
2447        // !A OR A if A is not nullable --> true
2448        let expr_a = col("c2_non_null").or(col("c2_non_null").not());
2449        let expr_b = col("c2_non_null").not().or(col("c2_non_null"));
2450        let expected = lit(true);
2451
2452        assert_eq!(simplify(expr_a), expected);
2453        assert_eq!(simplify(expr_b), expected);
2454    }
2455
2456    #[test]
2457    fn test_simplify_and_false() {
2458        let expr_a = lit(false).and(col("c2"));
2459        let expr_b = col("c2").and(lit(false));
2460        let expected = lit(false);
2461
2462        assert_eq!(simplify(expr_a), expected);
2463        assert_eq!(simplify(expr_b), expected);
2464    }
2465
2466    #[test]
2467    fn test_simplify_and_same() {
2468        let expr = col("c2").and(col("c2"));
2469        let expected = col("c2");
2470
2471        assert_eq!(simplify(expr), expected);
2472    }
2473
2474    #[test]
2475    fn test_simplify_and_true() {
2476        let expr_a = lit(true).and(col("c2"));
2477        let expr_b = col("c2").and(lit(true));
2478        let expected = col("c2");
2479
2480        assert_eq!(simplify(expr_a), expected);
2481        assert_eq!(simplify(expr_b), expected);
2482    }
2483
2484    #[test]
2485    fn test_simplify_and_not_self() {
2486        // A AND !A if A is not nullable --> false
2487        // !A AND A if A is not nullable --> false
2488        let expr_a = col("c2_non_null").and(col("c2_non_null").not());
2489        let expr_b = col("c2_non_null").not().and(col("c2_non_null"));
2490        let expected = lit(false);
2491
2492        assert_eq!(simplify(expr_a), expected);
2493        assert_eq!(simplify(expr_b), expected);
2494    }
2495
2496    #[test]
2497    fn test_simplify_multiply_by_one() {
2498        let expr_a = col("c2") * lit(1);
2499        let expr_b = lit(1) * col("c2");
2500        let expected = col("c2");
2501
2502        assert_eq!(simplify(expr_a), expected);
2503        assert_eq!(simplify(expr_b), expected);
2504
2505        let expr = col("c2") * lit(ScalarValue::Decimal128(Some(10000000000), 38, 10));
2506        assert_eq!(simplify(expr), expected);
2507
2508        let expr = lit(ScalarValue::Decimal128(Some(10000000000), 31, 10)) * col("c2");
2509        assert_eq!(simplify(expr), expected);
2510    }
2511
2512    #[test]
2513    fn test_simplify_multiply_by_null() {
2514        let null = lit(ScalarValue::Int64(None));
2515        // A * null --> null
2516        {
2517            let expr = col("c3") * null.clone();
2518            assert_eq!(simplify(expr), null);
2519        }
2520        // null * A --> null
2521        {
2522            let expr = null.clone() * col("c3");
2523            assert_eq!(simplify(expr), null);
2524        }
2525    }
2526
2527    #[test]
2528    fn test_simplify_multiply_by_zero() {
2529        // cannot optimize A * null (null * A) if A is nullable
2530        {
2531            let expr_a = col("c2") * lit(0);
2532            let expr_b = lit(0) * col("c2");
2533
2534            assert_eq!(simplify(expr_a.clone()), expr_a);
2535            assert_eq!(simplify(expr_b.clone()), expr_b);
2536        }
2537        // 0 * A --> 0 if A is not nullable
2538        {
2539            let expr = lit(0) * col("c2_non_null");
2540            assert_eq!(simplify(expr), lit(0));
2541        }
2542        // A * 0 --> 0 if A is not nullable
2543        {
2544            let expr = col("c2_non_null") * lit(0);
2545            assert_eq!(simplify(expr), lit(0));
2546        }
2547        // A * Decimal128(0) --> 0 if A is not nullable
2548        {
2549            let expr = col("c2_non_null") * lit(ScalarValue::Decimal128(Some(0), 31, 10));
2550            assert_eq!(
2551                simplify(expr),
2552                lit(ScalarValue::Decimal128(Some(0), 31, 10))
2553            );
2554            let expr = binary_expr(
2555                lit(ScalarValue::Decimal128(Some(0), 31, 10)),
2556                Operator::Multiply,
2557                col("c2_non_null"),
2558            );
2559            assert_eq!(
2560                simplify(expr),
2561                lit(ScalarValue::Decimal128(Some(0), 31, 10))
2562            );
2563        }
2564    }
2565
2566    #[test]
2567    fn test_simplify_divide_by_one() {
2568        let expr = binary_expr(col("c2"), Operator::Divide, lit(1));
2569        let expected = col("c2");
2570        assert_eq!(simplify(expr), expected);
2571        let expr = col("c2") / lit(ScalarValue::Decimal128(Some(10000000000), 31, 10));
2572        assert_eq!(simplify(expr), expected);
2573    }
2574
2575    #[test]
2576    fn test_simplify_divide_null() {
2577        // A / null --> null
2578        let null = lit(ScalarValue::Int64(None));
2579        {
2580            let expr = col("c3") / null.clone();
2581            assert_eq!(simplify(expr), null);
2582        }
2583        // null / A --> null
2584        {
2585            let expr = null.clone() / col("c3");
2586            assert_eq!(simplify(expr), null);
2587        }
2588    }
2589
2590    #[test]
2591    fn test_simplify_divide_by_same() {
2592        let expr = col("c2") / col("c2");
2593        // if c2 is null, c2 / c2 = null, so can't simplify
2594        let expected = expr.clone();
2595
2596        assert_eq!(simplify(expr), expected);
2597    }
2598
2599    #[test]
2600    fn test_simplify_modulo_by_null() {
2601        let null = lit(ScalarValue::Int64(None));
2602        // A % null --> null
2603        {
2604            let expr = col("c3") % null.clone();
2605            assert_eq!(simplify(expr), null);
2606        }
2607        // null % A --> null
2608        {
2609            let expr = null.clone() % col("c3");
2610            assert_eq!(simplify(expr), null);
2611        }
2612    }
2613
2614    #[test]
2615    fn test_simplify_modulo_by_one() {
2616        let expr = col("c2") % lit(1);
2617        // if c2 is null, c2 % 1 = null, so can't simplify
2618        let expected = expr.clone();
2619
2620        assert_eq!(simplify(expr), expected);
2621    }
2622
2623    #[test]
2624    fn test_simplify_divide_zero_by_zero() {
2625        // because divide by 0 maybe occur in short-circuit expression
2626        // so we should not simplify this, and throw error in runtime
2627        let expr = lit(0) / lit(0);
2628        let expected = expr.clone();
2629
2630        assert_eq!(simplify(expr), expected);
2631    }
2632
2633    #[test]
2634    fn test_simplify_divide_by_zero() {
2635        // because divide by 0 maybe occur in short-circuit expression
2636        // so we should not simplify this, and throw error in runtime
2637        let expr = col("c2_non_null") / lit(0);
2638        let expected = expr.clone();
2639
2640        assert_eq!(simplify(expr), expected);
2641    }
2642
2643    #[test]
2644    fn test_simplify_modulo_by_one_non_null() {
2645        let expr = col("c3_non_null") % lit(1);
2646        let expected = lit(0_i64);
2647        assert_eq!(simplify(expr), expected);
2648        let expr =
2649            col("c3_non_null") % lit(ScalarValue::Decimal128(Some(10000000000), 31, 10));
2650        assert_eq!(simplify(expr), expected);
2651    }
2652
2653    #[test]
2654    fn test_simplify_bitwise_xor_by_null() {
2655        let null = lit(ScalarValue::Int64(None));
2656        // A ^ null --> null
2657        {
2658            let expr = col("c3") ^ null.clone();
2659            assert_eq!(simplify(expr), null);
2660        }
2661        // null ^ A --> null
2662        {
2663            let expr = null.clone() ^ col("c3");
2664            assert_eq!(simplify(expr), null);
2665        }
2666    }
2667
2668    #[test]
2669    fn test_simplify_bitwise_shift_right_by_null() {
2670        let null = lit(ScalarValue::Int64(None));
2671        // A >> null --> null
2672        {
2673            let expr = col("c3") >> null.clone();
2674            assert_eq!(simplify(expr), null);
2675        }
2676        // null >> A --> null
2677        {
2678            let expr = null.clone() >> col("c3");
2679            assert_eq!(simplify(expr), null);
2680        }
2681    }
2682
2683    #[test]
2684    fn test_simplify_bitwise_shift_left_by_null() {
2685        let null = lit(ScalarValue::Int64(None));
2686        // A << null --> null
2687        {
2688            let expr = col("c3") << null.clone();
2689            assert_eq!(simplify(expr), null);
2690        }
2691        // null << A --> null
2692        {
2693            let expr = null.clone() << col("c3");
2694            assert_eq!(simplify(expr), null);
2695        }
2696    }
2697
2698    #[test]
2699    fn test_simplify_bitwise_and_by_zero() {
2700        // A & 0 --> 0
2701        {
2702            let expr = col("c2_non_null") & lit(0);
2703            assert_eq!(simplify(expr), lit(0));
2704        }
2705        // 0 & A --> 0
2706        {
2707            let expr = lit(0) & col("c2_non_null");
2708            assert_eq!(simplify(expr), lit(0));
2709        }
2710    }
2711
2712    #[test]
2713    fn test_simplify_bitwise_or_by_zero() {
2714        // A | 0 --> A
2715        {
2716            let expr = col("c2_non_null") | lit(0);
2717            assert_eq!(simplify(expr), col("c2_non_null"));
2718        }
2719        // 0 | A --> A
2720        {
2721            let expr = lit(0) | col("c2_non_null");
2722            assert_eq!(simplify(expr), col("c2_non_null"));
2723        }
2724    }
2725
2726    #[test]
2727    fn test_simplify_bitwise_xor_by_zero() {
2728        // A ^ 0 --> A
2729        {
2730            let expr = col("c2_non_null") ^ lit(0);
2731            assert_eq!(simplify(expr), col("c2_non_null"));
2732        }
2733        // 0 ^ A --> A
2734        {
2735            let expr = lit(0) ^ col("c2_non_null");
2736            assert_eq!(simplify(expr), col("c2_non_null"));
2737        }
2738    }
2739
2740    #[test]
2741    fn test_simplify_bitwise_bitwise_shift_right_by_zero() {
2742        // A >> 0 --> A
2743        {
2744            let expr = col("c2_non_null") >> lit(0);
2745            assert_eq!(simplify(expr), col("c2_non_null"));
2746        }
2747    }
2748
2749    #[test]
2750    fn test_simplify_bitwise_bitwise_shift_left_by_zero() {
2751        // A << 0 --> A
2752        {
2753            let expr = col("c2_non_null") << lit(0);
2754            assert_eq!(simplify(expr), col("c2_non_null"));
2755        }
2756    }
2757
2758    #[test]
2759    fn test_simplify_bitwise_and_by_null() {
2760        let null = Expr::Literal(ScalarValue::Int64(None), None);
2761        // A & null --> null
2762        {
2763            let expr = col("c3") & null.clone();
2764            assert_eq!(simplify(expr), null);
2765        }
2766        // null & A --> null
2767        {
2768            let expr = null.clone() & col("c3");
2769            assert_eq!(simplify(expr), null);
2770        }
2771    }
2772
2773    #[test]
2774    fn test_simplify_composed_bitwise_and() {
2775        // ((c2 > 5) & (c1 < 6)) & (c2 > 5) --> (c2 > 5) & (c1 < 6)
2776
2777        let expr = bitwise_and(
2778            bitwise_and(col("c2").gt(lit(5)), col("c1").lt(lit(6))),
2779            col("c2").gt(lit(5)),
2780        );
2781        let expected = bitwise_and(col("c2").gt(lit(5)), col("c1").lt(lit(6)));
2782
2783        assert_eq!(simplify(expr), expected);
2784
2785        // (c2 > 5) & ((c2 > 5) & (c1 < 6)) --> (c2 > 5) & (c1 < 6)
2786
2787        let expr = bitwise_and(
2788            col("c2").gt(lit(5)),
2789            bitwise_and(col("c2").gt(lit(5)), col("c1").lt(lit(6))),
2790        );
2791        let expected = bitwise_and(col("c2").gt(lit(5)), col("c1").lt(lit(6)));
2792        assert_eq!(simplify(expr), expected);
2793    }
2794
2795    #[test]
2796    fn test_simplify_composed_bitwise_or() {
2797        // ((c2 > 5) | (c1 < 6)) | (c2 > 5) --> (c2 > 5) | (c1 < 6)
2798
2799        let expr = bitwise_or(
2800            bitwise_or(col("c2").gt(lit(5)), col("c1").lt(lit(6))),
2801            col("c2").gt(lit(5)),
2802        );
2803        let expected = bitwise_or(col("c2").gt(lit(5)), col("c1").lt(lit(6)));
2804
2805        assert_eq!(simplify(expr), expected);
2806
2807        // (c2 > 5) | ((c2 > 5) | (c1 < 6)) --> (c2 > 5) | (c1 < 6)
2808
2809        let expr = bitwise_or(
2810            col("c2").gt(lit(5)),
2811            bitwise_or(col("c2").gt(lit(5)), col("c1").lt(lit(6))),
2812        );
2813        let expected = bitwise_or(col("c2").gt(lit(5)), col("c1").lt(lit(6)));
2814
2815        assert_eq!(simplify(expr), expected);
2816    }
2817
2818    #[test]
2819    fn test_simplify_composed_bitwise_xor() {
2820        // with an even number of the column "c2"
2821        // c2 ^ ((c2 ^ (c2 | c1)) ^ (c1 & c2)) --> (c2 | c1) ^ (c1 & c2)
2822
2823        let expr = bitwise_xor(
2824            col("c2"),
2825            bitwise_xor(
2826                bitwise_xor(col("c2"), bitwise_or(col("c2"), col("c1"))),
2827                bitwise_and(col("c1"), col("c2")),
2828            ),
2829        );
2830
2831        let expected = bitwise_xor(
2832            bitwise_or(col("c2"), col("c1")),
2833            bitwise_and(col("c1"), col("c2")),
2834        );
2835
2836        assert_eq!(simplify(expr), expected);
2837
2838        // with an odd number of the column "c2"
2839        // c2 ^ (c2 ^ (c2 | c1)) ^ ((c1 & c2) ^ c2) --> c2 ^ ((c2 | c1) ^ (c1 & c2))
2840
2841        let expr = bitwise_xor(
2842            col("c2"),
2843            bitwise_xor(
2844                bitwise_xor(col("c2"), bitwise_or(col("c2"), col("c1"))),
2845                bitwise_xor(bitwise_and(col("c1"), col("c2")), col("c2")),
2846            ),
2847        );
2848
2849        let expected = bitwise_xor(
2850            col("c2"),
2851            bitwise_xor(
2852                bitwise_or(col("c2"), col("c1")),
2853                bitwise_and(col("c1"), col("c2")),
2854            ),
2855        );
2856
2857        assert_eq!(simplify(expr), expected);
2858
2859        // with an even number of the column "c2"
2860        // ((c2 ^ (c2 | c1)) ^ (c1 & c2)) ^ c2 --> (c2 | c1) ^ (c1 & c2)
2861
2862        let expr = bitwise_xor(
2863            bitwise_xor(
2864                bitwise_xor(col("c2"), bitwise_or(col("c2"), col("c1"))),
2865                bitwise_and(col("c1"), col("c2")),
2866            ),
2867            col("c2"),
2868        );
2869
2870        let expected = bitwise_xor(
2871            bitwise_or(col("c2"), col("c1")),
2872            bitwise_and(col("c1"), col("c2")),
2873        );
2874
2875        assert_eq!(simplify(expr), expected);
2876
2877        // with an odd number of the column "c2"
2878        // (c2 ^ (c2 | c1)) ^ ((c1 & c2) ^ c2) ^ c2 --> ((c2 | c1) ^ (c1 & c2)) ^ c2
2879
2880        let expr = bitwise_xor(
2881            bitwise_xor(
2882                bitwise_xor(col("c2"), bitwise_or(col("c2"), col("c1"))),
2883                bitwise_xor(bitwise_and(col("c1"), col("c2")), col("c2")),
2884            ),
2885            col("c2"),
2886        );
2887
2888        let expected = bitwise_xor(
2889            bitwise_xor(
2890                bitwise_or(col("c2"), col("c1")),
2891                bitwise_and(col("c1"), col("c2")),
2892            ),
2893            col("c2"),
2894        );
2895
2896        assert_eq!(simplify(expr), expected);
2897    }
2898
2899    #[test]
2900    fn test_simplify_negated_bitwise_and() {
2901        // !c4 & c4 --> 0
2902        let expr = (-col("c4_non_null")) & col("c4_non_null");
2903        let expected = lit(0u32);
2904
2905        assert_eq!(simplify(expr), expected);
2906        // c4 & !c4 --> 0
2907        let expr = col("c4_non_null") & (-col("c4_non_null"));
2908        let expected = lit(0u32);
2909
2910        assert_eq!(simplify(expr), expected);
2911
2912        // !c3 & c3 --> 0
2913        let expr = (-col("c3_non_null")) & col("c3_non_null");
2914        let expected = lit(0i64);
2915
2916        assert_eq!(simplify(expr), expected);
2917        // c3 & !c3 --> 0
2918        let expr = col("c3_non_null") & (-col("c3_non_null"));
2919        let expected = lit(0i64);
2920
2921        assert_eq!(simplify(expr), expected);
2922    }
2923
2924    #[test]
2925    fn test_simplify_negated_bitwise_or() {
2926        // !c4 | c4 --> -1
2927        let expr = (-col("c4_non_null")) | col("c4_non_null");
2928        let expected = lit(-1i32);
2929
2930        assert_eq!(simplify(expr), expected);
2931
2932        // c4 | !c4 --> -1
2933        let expr = col("c4_non_null") | (-col("c4_non_null"));
2934        let expected = lit(-1i32);
2935
2936        assert_eq!(simplify(expr), expected);
2937
2938        // !c3 | c3 --> -1
2939        let expr = (-col("c3_non_null")) | col("c3_non_null");
2940        let expected = lit(-1i64);
2941
2942        assert_eq!(simplify(expr), expected);
2943
2944        // c3 | !c3 --> -1
2945        let expr = col("c3_non_null") | (-col("c3_non_null"));
2946        let expected = lit(-1i64);
2947
2948        assert_eq!(simplify(expr), expected);
2949    }
2950
2951    #[test]
2952    fn test_simplify_negated_bitwise_xor() {
2953        // !c4 ^ c4 --> -1
2954        let expr = (-col("c4_non_null")) ^ col("c4_non_null");
2955        let expected = lit(-1i32);
2956
2957        assert_eq!(simplify(expr), expected);
2958
2959        // c4 ^ !c4 --> -1
2960        let expr = col("c4_non_null") ^ (-col("c4_non_null"));
2961        let expected = lit(-1i32);
2962
2963        assert_eq!(simplify(expr), expected);
2964
2965        // !c3 ^ c3 --> -1
2966        let expr = (-col("c3_non_null")) ^ col("c3_non_null");
2967        let expected = lit(-1i64);
2968
2969        assert_eq!(simplify(expr), expected);
2970
2971        // c3 ^ !c3 --> -1
2972        let expr = col("c3_non_null") ^ (-col("c3_non_null"));
2973        let expected = lit(-1i64);
2974
2975        assert_eq!(simplify(expr), expected);
2976    }
2977
2978    #[test]
2979    fn test_simplify_bitwise_and_or() {
2980        // (c2 < 3) & ((c2 < 3) | c1) -> (c2 < 3)
2981        let expr = bitwise_and(
2982            col("c2_non_null").lt(lit(3)),
2983            bitwise_or(col("c2_non_null").lt(lit(3)), col("c1_non_null")),
2984        );
2985        let expected = col("c2_non_null").lt(lit(3));
2986
2987        assert_eq!(simplify(expr), expected);
2988    }
2989
2990    #[test]
2991    fn test_simplify_bitwise_or_and() {
2992        // (c2 < 3) | ((c2 < 3) & c1) -> (c2 < 3)
2993        let expr = bitwise_or(
2994            col("c2_non_null").lt(lit(3)),
2995            bitwise_and(col("c2_non_null").lt(lit(3)), col("c1_non_null")),
2996        );
2997        let expected = col("c2_non_null").lt(lit(3));
2998
2999        assert_eq!(simplify(expr), expected);
3000    }
3001
3002    #[test]
3003    fn test_simplify_simple_bitwise_and() {
3004        // (c2 > 5) & (c2 > 5) -> (c2 > 5)
3005        let expr = (col("c2").gt(lit(5))).bitand(col("c2").gt(lit(5)));
3006        let expected = col("c2").gt(lit(5));
3007
3008        assert_eq!(simplify(expr), expected);
3009    }
3010
3011    #[test]
3012    fn test_simplify_simple_bitwise_or() {
3013        // (c2 > 5) | (c2 > 5) -> (c2 > 5)
3014        let expr = (col("c2").gt(lit(5))).bitor(col("c2").gt(lit(5)));
3015        let expected = col("c2").gt(lit(5));
3016
3017        assert_eq!(simplify(expr), expected);
3018    }
3019
3020    #[test]
3021    fn test_simplify_simple_bitwise_xor() {
3022        // c4 ^ c4 -> 0
3023        let expr = (col("c4")).bitxor(col("c4"));
3024        let expected = lit(0u32);
3025
3026        assert_eq!(simplify(expr), expected);
3027
3028        // c3 ^ c3 -> 0
3029        let expr = col("c3").bitxor(col("c3"));
3030        let expected = lit(0i64);
3031
3032        assert_eq!(simplify(expr), expected);
3033    }
3034
3035    #[test]
3036    fn test_simplify_modulo_by_zero_non_null() {
3037        // because modulo by 0 maybe occur in short-circuit expression
3038        // so we should not simplify this, and throw error in runtime.
3039        let expr = col("c2_non_null") % lit(0);
3040        let expected = expr.clone();
3041
3042        assert_eq!(simplify(expr), expected);
3043    }
3044
3045    #[test]
3046    fn test_simplify_simple_and() {
3047        // (c2 > 5) AND (c2 > 5) -> (c2 > 5)
3048        let expr = (col("c2").gt(lit(5))).and(col("c2").gt(lit(5)));
3049        let expected = col("c2").gt(lit(5));
3050
3051        assert_eq!(simplify(expr), expected);
3052    }
3053
3054    #[test]
3055    fn test_simplify_composed_and() {
3056        // ((c2 > 5) AND (c1 < 6)) AND (c2 > 5)
3057        let expr = and(
3058            and(col("c2").gt(lit(5)), col("c1").lt(lit(6))),
3059            col("c2").gt(lit(5)),
3060        );
3061        let expected = and(col("c2").gt(lit(5)), col("c1").lt(lit(6)));
3062
3063        assert_eq!(simplify(expr), expected);
3064    }
3065
3066    #[test]
3067    fn test_simplify_negated_and() {
3068        // (c2 > 5) AND !(c2 > 5) --> (c2 > 5) AND (c2 <= 5)
3069        let expr = and(col("c2").gt(lit(5)), Expr::not(col("c2").gt(lit(5))));
3070        let expected = col("c2").gt(lit(5)).and(col("c2").lt_eq(lit(5)));
3071
3072        assert_eq!(simplify(expr), expected);
3073    }
3074
3075    #[test]
3076    fn test_simplify_or_and() {
3077        let l = col("c2").gt(lit(5));
3078        let r = and(col("c1").lt(lit(6)), col("c2").gt(lit(5)));
3079
3080        // (c2 > 5) OR ((c1 < 6) AND (c2 > 5))
3081        let expr = or(l.clone(), r.clone());
3082
3083        let expected = l.clone();
3084        assert_eq!(simplify(expr), expected);
3085
3086        // ((c1 < 6) AND (c2 > 5)) OR (c2 > 5)
3087        let expr = or(r, l);
3088        assert_eq!(simplify(expr), expected);
3089    }
3090
3091    #[test]
3092    fn test_simplify_or_and_non_null() {
3093        let l = col("c2_non_null").gt(lit(5));
3094        let r = and(col("c1_non_null").lt(lit(6)), col("c2_non_null").gt(lit(5)));
3095
3096        // (c2 > 5) OR ((c1 < 6) AND (c2 > 5)) --> c2 > 5
3097        let expr = or(l.clone(), r.clone());
3098
3099        // This is only true if `c1 < 6` is not nullable / can not be null.
3100        let expected = col("c2_non_null").gt(lit(5));
3101
3102        assert_eq!(simplify(expr), expected);
3103
3104        // ((c1 < 6) AND (c2 > 5)) OR (c2 > 5) --> c2 > 5
3105        let expr = or(l, r);
3106
3107        assert_eq!(simplify(expr), expected);
3108    }
3109
3110    #[test]
3111    fn test_simplify_and_or() {
3112        let l = col("c2").gt(lit(5));
3113        let r = or(col("c1").lt(lit(6)), col("c2").gt(lit(5)));
3114
3115        // (c2 > 5) AND ((c1 < 6) OR (c2 > 5)) --> c2 > 5
3116        let expr = and(l.clone(), r.clone());
3117
3118        let expected = l.clone();
3119        assert_eq!(simplify(expr), expected);
3120
3121        // ((c1 < 6) OR (c2 > 5)) AND (c2 > 5) --> c2 > 5
3122        let expr = and(r, l);
3123        assert_eq!(simplify(expr), expected);
3124    }
3125
3126    #[test]
3127    fn test_simplify_and_or_non_null() {
3128        let l = col("c2_non_null").gt(lit(5));
3129        let r = or(col("c1_non_null").lt(lit(6)), col("c2_non_null").gt(lit(5)));
3130
3131        // (c2 > 5) AND ((c1 < 6) OR (c2 > 5)) --> c2 > 5
3132        let expr = and(l.clone(), r.clone());
3133
3134        // This is only true if `c1 < 6` is not nullable / can not be null.
3135        let expected = col("c2_non_null").gt(lit(5));
3136
3137        assert_eq!(simplify(expr), expected);
3138
3139        // ((c1 < 6) OR (c2 > 5)) AND (c2 > 5) --> c2 > 5
3140        let expr = and(l, r);
3141
3142        assert_eq!(simplify(expr), expected);
3143    }
3144
3145    #[test]
3146    fn test_simplify_by_de_morgan_laws() {
3147        // Laws with logical operations
3148        // !(c3 AND c4) --> !c3 OR !c4
3149        let expr = and(col("c3"), col("c4")).not();
3150        let expected = or(col("c3").not(), col("c4").not());
3151        assert_eq!(simplify(expr), expected);
3152        // !(c3 OR c4) --> !c3 AND !c4
3153        let expr = or(col("c3"), col("c4")).not();
3154        let expected = and(col("c3").not(), col("c4").not());
3155        assert_eq!(simplify(expr), expected);
3156        // !(!c3) --> c3
3157        let expr = col("c3").not().not();
3158        let expected = col("c3");
3159        assert_eq!(simplify(expr), expected);
3160
3161        // Laws with bitwise operations
3162        // !(c3 & c4) --> !c3 | !c4
3163        let expr = -bitwise_and(col("c3"), col("c4"));
3164        let expected = bitwise_or(-col("c3"), -col("c4"));
3165        assert_eq!(simplify(expr), expected);
3166        // !(c3 | c4) --> !c3 & !c4
3167        let expr = -bitwise_or(col("c3"), col("c4"));
3168        let expected = bitwise_and(-col("c3"), -col("c4"));
3169        assert_eq!(simplify(expr), expected);
3170        // !(!c3) --> c3
3171        let expr = -(-col("c3"));
3172        let expected = col("c3");
3173        assert_eq!(simplify(expr), expected);
3174    }
3175
3176    #[test]
3177    fn test_simplify_null_and_false() {
3178        let expr = and(lit_bool_null(), lit(false));
3179        let expr_eq = lit(false);
3180
3181        assert_eq!(simplify(expr), expr_eq);
3182    }
3183
3184    #[test]
3185    fn test_simplify_divide_null_by_null() {
3186        let null = lit(ScalarValue::Int32(None));
3187        let expr_plus = null.clone() / null.clone();
3188        let expr_eq = null;
3189
3190        assert_eq!(simplify(expr_plus), expr_eq);
3191    }
3192
3193    #[test]
3194    fn test_simplify_simplify_arithmetic_expr() {
3195        let expr_plus = lit(1) + lit(1);
3196
3197        assert_eq!(simplify(expr_plus), lit(2));
3198    }
3199
3200    #[test]
3201    fn test_simplify_simplify_eq_expr() {
3202        let expr_eq = binary_expr(lit(1), Operator::Eq, lit(1));
3203
3204        assert_eq!(simplify(expr_eq), lit(true));
3205    }
3206
3207    #[test]
3208    fn test_simplify_regex() {
3209        // malformed regex
3210        assert_contains!(
3211            try_simplify(regex_match(col("c1"), lit("foo{")))
3212                .unwrap_err()
3213                .to_string(),
3214            "regex parse error"
3215        );
3216
3217        // unsupported cases
3218        assert_no_change(regex_match(col("c1"), lit("foo.*")));
3219        assert_no_change(regex_match(col("c1"), lit("(foo)")));
3220        assert_no_change(regex_match(col("c1"), lit("%")));
3221        assert_no_change(regex_match(col("c1"), lit("_")));
3222        assert_no_change(regex_match(col("c1"), lit("f%o")));
3223        assert_no_change(regex_match(col("c1"), lit("^f%o")));
3224        assert_no_change(regex_match(col("c1"), lit("f_o")));
3225
3226        // empty cases
3227        assert_change(
3228            regex_match(col("c1"), lit("")),
3229            if_not_null(col("c1"), true),
3230        );
3231        assert_change(
3232            regex_not_match(col("c1"), lit("")),
3233            if_not_null(col("c1"), false),
3234        );
3235        assert_change(
3236            regex_imatch(col("c1"), lit("")),
3237            if_not_null(col("c1"), true),
3238        );
3239        assert_change(
3240            regex_not_imatch(col("c1"), lit("")),
3241            if_not_null(col("c1"), false),
3242        );
3243
3244        // single character
3245        assert_change(regex_match(col("c1"), lit("x")), col("c1").like(lit("%x%")));
3246
3247        // single word
3248        assert_change(
3249            regex_match(col("c1"), lit("foo")),
3250            col("c1").like(lit("%foo%")),
3251        );
3252
3253        // regular expressions that match an exact literal
3254        assert_change(regex_match(col("c1"), lit("^$")), col("c1").eq(lit("")));
3255        assert_change(
3256            regex_not_match(col("c1"), lit("^$")),
3257            col("c1").not_eq(lit("")),
3258        );
3259        assert_change(
3260            regex_match(col("c1"), lit("^foo$")),
3261            col("c1").eq(lit("foo")),
3262        );
3263        assert_change(
3264            regex_not_match(col("c1"), lit("^foo$")),
3265            col("c1").not_eq(lit("foo")),
3266        );
3267
3268        // regular expressions that match exact captured literals
3269        assert_change(
3270            regex_match(col("c1"), lit("^(foo|bar)$")),
3271            col("c1").eq(lit("foo")).or(col("c1").eq(lit("bar"))),
3272        );
3273        assert_change(
3274            regex_not_match(col("c1"), lit("^(foo|bar)$")),
3275            col("c1")
3276                .not_eq(lit("foo"))
3277                .and(col("c1").not_eq(lit("bar"))),
3278        );
3279        assert_change(
3280            regex_match(col("c1"), lit("^(foo)$")),
3281            col("c1").eq(lit("foo")),
3282        );
3283        assert_change(
3284            regex_match(col("c1"), lit("^(foo|bar|baz)$")),
3285            ((col("c1").eq(lit("foo"))).or(col("c1").eq(lit("bar"))))
3286                .or(col("c1").eq(lit("baz"))),
3287        );
3288        assert_change(
3289            regex_match(col("c1"), lit("^(foo|bar|baz|qux)$")),
3290            col("c1")
3291                .in_list(vec![lit("foo"), lit("bar"), lit("baz"), lit("qux")], false),
3292        );
3293        assert_change(
3294            regex_match(col("c1"), lit("^(fo_o)$")),
3295            col("c1").eq(lit("fo_o")),
3296        );
3297        assert_change(
3298            regex_match(col("c1"), lit("^(fo_o)$")),
3299            col("c1").eq(lit("fo_o")),
3300        );
3301        assert_change(
3302            regex_match(col("c1"), lit("^(fo_o|ba_r)$")),
3303            col("c1").eq(lit("fo_o")).or(col("c1").eq(lit("ba_r"))),
3304        );
3305        assert_change(
3306            regex_not_match(col("c1"), lit("^(fo_o|ba_r)$")),
3307            col("c1")
3308                .not_eq(lit("fo_o"))
3309                .and(col("c1").not_eq(lit("ba_r"))),
3310        );
3311        assert_change(
3312            regex_match(col("c1"), lit("^(fo_o|ba_r|ba_z)$")),
3313            ((col("c1").eq(lit("fo_o"))).or(col("c1").eq(lit("ba_r"))))
3314                .or(col("c1").eq(lit("ba_z"))),
3315        );
3316        assert_change(
3317            regex_match(col("c1"), lit("^(fo_o|ba_r|baz|qu_x)$")),
3318            col("c1").in_list(
3319                vec![lit("fo_o"), lit("ba_r"), lit("baz"), lit("qu_x")],
3320                false,
3321            ),
3322        );
3323
3324        // regular expressions that mismatch captured literals
3325        assert_no_change(regex_match(col("c1"), lit("(foo|bar)")));
3326        assert_no_change(regex_match(col("c1"), lit("(foo|bar)*")));
3327        assert_no_change(regex_match(col("c1"), lit("(fo_o|b_ar)")));
3328        assert_no_change(regex_match(col("c1"), lit("(foo|ba_r)*")));
3329        assert_no_change(regex_match(col("c1"), lit("(fo_o|ba_r)*")));
3330        assert_no_change(regex_match(col("c1"), lit("^(foo|bar)*")));
3331        assert_no_change(regex_match(col("c1"), lit("^(foo)(bar)$")));
3332        assert_no_change(regex_match(col("c1"), lit("^")));
3333        assert_no_change(regex_match(col("c1"), lit("$")));
3334        assert_no_change(regex_match(col("c1"), lit("$^")));
3335        assert_no_change(regex_match(col("c1"), lit("$foo^")));
3336
3337        // regular expressions that match a partial literal
3338        assert_change(
3339            regex_match(col("c1"), lit("^foo")),
3340            col("c1").like(lit("foo%")),
3341        );
3342        assert_change(
3343            regex_match(col("c1"), lit("foo$")),
3344            col("c1").like(lit("%foo")),
3345        );
3346        assert_change(
3347            regex_match(col("c1"), lit("^foo|bar$")),
3348            col("c1").like(lit("foo%")).or(col("c1").like(lit("%bar"))),
3349        );
3350
3351        // OR-chain
3352        assert_change(
3353            regex_match(col("c1"), lit("foo|bar|baz")),
3354            col("c1")
3355                .like(lit("%foo%"))
3356                .or(col("c1").like(lit("%bar%")))
3357                .or(col("c1").like(lit("%baz%"))),
3358        );
3359        assert_change(
3360            regex_match(col("c1"), lit("foo|x|baz")),
3361            col("c1")
3362                .like(lit("%foo%"))
3363                .or(col("c1").like(lit("%x%")))
3364                .or(col("c1").like(lit("%baz%"))),
3365        );
3366        assert_change(
3367            regex_not_match(col("c1"), lit("foo|bar|baz")),
3368            col("c1")
3369                .not_like(lit("%foo%"))
3370                .and(col("c1").not_like(lit("%bar%")))
3371                .and(col("c1").not_like(lit("%baz%"))),
3372        );
3373        // both anchored expressions (translated to equality) and unanchored
3374        assert_change(
3375            regex_match(col("c1"), lit("foo|^x$|baz")),
3376            col("c1")
3377                .like(lit("%foo%"))
3378                .or(col("c1").eq(lit("x")))
3379                .or(col("c1").like(lit("%baz%"))),
3380        );
3381        assert_change(
3382            regex_not_match(col("c1"), lit("foo|^bar$|baz")),
3383            col("c1")
3384                .not_like(lit("%foo%"))
3385                .and(col("c1").not_eq(lit("bar")))
3386                .and(col("c1").not_like(lit("%baz%"))),
3387        );
3388        // Too many patterns (MAX_REGEX_ALTERNATIONS_EXPANSION)
3389        assert_no_change(regex_match(col("c1"), lit("foo|bar|baz|blarg|bozo|etc")));
3390    }
3391
3392    #[track_caller]
3393    fn assert_no_change(expr: Expr) {
3394        let optimized = simplify(expr.clone());
3395        assert_eq!(expr, optimized);
3396    }
3397
3398    #[track_caller]
3399    fn assert_change(expr: Expr, expected: Expr) {
3400        let optimized = simplify(expr);
3401        assert_eq!(optimized, expected);
3402    }
3403
3404    fn regex_match(left: Expr, right: Expr) -> Expr {
3405        Expr::BinaryExpr(BinaryExpr {
3406            left: Box::new(left),
3407            op: Operator::RegexMatch,
3408            right: Box::new(right),
3409        })
3410    }
3411
3412    fn regex_not_match(left: Expr, right: Expr) -> Expr {
3413        Expr::BinaryExpr(BinaryExpr {
3414            left: Box::new(left),
3415            op: Operator::RegexNotMatch,
3416            right: Box::new(right),
3417        })
3418    }
3419
3420    fn regex_imatch(left: Expr, right: Expr) -> Expr {
3421        Expr::BinaryExpr(BinaryExpr {
3422            left: Box::new(left),
3423            op: Operator::RegexIMatch,
3424            right: Box::new(right),
3425        })
3426    }
3427
3428    fn regex_not_imatch(left: Expr, right: Expr) -> Expr {
3429        Expr::BinaryExpr(BinaryExpr {
3430            left: Box::new(left),
3431            op: Operator::RegexNotIMatch,
3432            right: Box::new(right),
3433        })
3434    }
3435
3436    // ------------------------------
3437    // ----- Simplifier tests -------
3438    // ------------------------------
3439
3440    fn try_simplify(expr: Expr) -> Result<Expr> {
3441        let schema = expr_test_schema();
3442        let execution_props = ExecutionProps::new();
3443        let simplifier = ExprSimplifier::new(
3444            SimplifyContext::new(&execution_props).with_schema(schema),
3445        );
3446        simplifier.simplify(expr)
3447    }
3448
3449    fn coerce(expr: Expr) -> Expr {
3450        let schema = expr_test_schema();
3451        let execution_props = ExecutionProps::new();
3452        let simplifier = ExprSimplifier::new(
3453            SimplifyContext::new(&execution_props).with_schema(Arc::clone(&schema)),
3454        );
3455        simplifier.coerce(expr, schema.as_ref()).unwrap()
3456    }
3457
3458    fn simplify(expr: Expr) -> Expr {
3459        try_simplify(expr).unwrap()
3460    }
3461
3462    fn try_simplify_with_cycle_count(expr: Expr) -> Result<(Expr, u32)> {
3463        let schema = expr_test_schema();
3464        let execution_props = ExecutionProps::new();
3465        let simplifier = ExprSimplifier::new(
3466            SimplifyContext::new(&execution_props).with_schema(schema),
3467        );
3468        let (expr, count) = simplifier.simplify_with_cycle_count_transformed(expr)?;
3469        Ok((expr.data, count))
3470    }
3471
3472    fn simplify_with_cycle_count(expr: Expr) -> (Expr, u32) {
3473        try_simplify_with_cycle_count(expr).unwrap()
3474    }
3475
3476    fn simplify_with_guarantee(
3477        expr: Expr,
3478        guarantees: Vec<(Expr, NullableInterval)>,
3479    ) -> Expr {
3480        let schema = expr_test_schema();
3481        let execution_props = ExecutionProps::new();
3482        let simplifier = ExprSimplifier::new(
3483            SimplifyContext::new(&execution_props).with_schema(schema),
3484        )
3485        .with_guarantees(guarantees);
3486        simplifier.simplify(expr).unwrap()
3487    }
3488
3489    fn expr_test_schema() -> DFSchemaRef {
3490        static EXPR_TEST_SCHEMA: LazyLock<DFSchemaRef> = LazyLock::new(|| {
3491            Arc::new(
3492                DFSchema::from_unqualified_fields(
3493                    vec![
3494                        Field::new("c1", DataType::Utf8, true),
3495                        Field::new("c2", DataType::Boolean, true),
3496                        Field::new("c3", DataType::Int64, true),
3497                        Field::new("c4", DataType::UInt32, true),
3498                        Field::new("c1_non_null", DataType::Utf8, false),
3499                        Field::new("c2_non_null", DataType::Boolean, false),
3500                        Field::new("c3_non_null", DataType::Int64, false),
3501                        Field::new("c4_non_null", DataType::UInt32, false),
3502                        Field::new("c5", DataType::FixedSizeBinary(3), true),
3503                    ]
3504                    .into(),
3505                    HashMap::new(),
3506                )
3507                .unwrap(),
3508            )
3509        });
3510        Arc::clone(&EXPR_TEST_SCHEMA)
3511    }
3512
3513    #[test]
3514    fn simplify_expr_null_comparison() {
3515        // x = null is always null
3516        assert_eq!(
3517            simplify(lit(true).eq(lit(ScalarValue::Boolean(None)))),
3518            lit(ScalarValue::Boolean(None)),
3519        );
3520
3521        // null != null is always null
3522        assert_eq!(
3523            simplify(
3524                lit(ScalarValue::Boolean(None)).not_eq(lit(ScalarValue::Boolean(None)))
3525            ),
3526            lit(ScalarValue::Boolean(None)),
3527        );
3528
3529        // x != null is always null
3530        assert_eq!(
3531            simplify(col("c2").not_eq(lit(ScalarValue::Boolean(None)))),
3532            lit(ScalarValue::Boolean(None)),
3533        );
3534
3535        // null = x is always null
3536        assert_eq!(
3537            simplify(lit(ScalarValue::Boolean(None)).eq(col("c2"))),
3538            lit(ScalarValue::Boolean(None)),
3539        );
3540    }
3541
3542    #[test]
3543    fn simplify_expr_is_not_null() {
3544        assert_eq!(
3545            simplify(Expr::IsNotNull(Box::new(col("c1")))),
3546            Expr::IsNotNull(Box::new(col("c1")))
3547        );
3548
3549        // 'c1_non_null IS NOT NULL' is always true
3550        assert_eq!(
3551            simplify(Expr::IsNotNull(Box::new(col("c1_non_null")))),
3552            lit(true)
3553        );
3554    }
3555
3556    #[test]
3557    fn simplify_expr_is_null() {
3558        assert_eq!(
3559            simplify(Expr::IsNull(Box::new(col("c1")))),
3560            Expr::IsNull(Box::new(col("c1")))
3561        );
3562
3563        // 'c1_non_null IS NULL' is always false
3564        assert_eq!(
3565            simplify(Expr::IsNull(Box::new(col("c1_non_null")))),
3566            lit(false)
3567        );
3568    }
3569
3570    #[test]
3571    fn simplify_expr_is_unknown() {
3572        assert_eq!(simplify(col("c2").is_unknown()), col("c2").is_unknown(),);
3573
3574        // 'c2_non_null is unknown' is always false
3575        assert_eq!(simplify(col("c2_non_null").is_unknown()), lit(false));
3576    }
3577
3578    #[test]
3579    fn simplify_expr_is_not_known() {
3580        assert_eq!(
3581            simplify(col("c2").is_not_unknown()),
3582            col("c2").is_not_unknown()
3583        );
3584
3585        // 'c2_non_null is not unknown' is always true
3586        assert_eq!(simplify(col("c2_non_null").is_not_unknown()), lit(true));
3587    }
3588
3589    #[test]
3590    fn simplify_expr_eq() {
3591        let schema = expr_test_schema();
3592        assert_eq!(col("c2").get_type(&schema).unwrap(), DataType::Boolean);
3593
3594        // true = true -> true
3595        assert_eq!(simplify(lit(true).eq(lit(true))), lit(true));
3596
3597        // true = false -> false
3598        assert_eq!(simplify(lit(true).eq(lit(false))), lit(false),);
3599
3600        // c2 = true -> c2
3601        assert_eq!(simplify(col("c2").eq(lit(true))), col("c2"));
3602
3603        // c2 = false => !c2
3604        assert_eq!(simplify(col("c2").eq(lit(false))), col("c2").not(),);
3605    }
3606
3607    #[test]
3608    fn simplify_expr_eq_skip_nonboolean_type() {
3609        let schema = expr_test_schema();
3610
3611        // When one of the operand is not of boolean type, folding the
3612        // other boolean constant will change return type of
3613        // expression to non-boolean.
3614        //
3615        // Make sure c1 column to be used in tests is not boolean type
3616        assert_eq!(col("c1").get_type(&schema).unwrap(), DataType::Utf8);
3617
3618        // don't fold c1 = foo
3619        assert_eq!(simplify(col("c1").eq(lit("foo"))), col("c1").eq(lit("foo")),);
3620    }
3621
3622    #[test]
3623    fn simplify_expr_not_eq() {
3624        let schema = expr_test_schema();
3625
3626        assert_eq!(col("c2").get_type(&schema).unwrap(), DataType::Boolean);
3627
3628        // c2 != true -> !c2
3629        assert_eq!(simplify(col("c2").not_eq(lit(true))), col("c2").not(),);
3630
3631        // c2 != false -> c2
3632        assert_eq!(simplify(col("c2").not_eq(lit(false))), col("c2"),);
3633
3634        // test constant
3635        assert_eq!(simplify(lit(true).not_eq(lit(true))), lit(false),);
3636
3637        assert_eq!(simplify(lit(true).not_eq(lit(false))), lit(true),);
3638    }
3639
3640    #[test]
3641    fn simplify_expr_not_eq_skip_nonboolean_type() {
3642        let schema = expr_test_schema();
3643
3644        // when one of the operand is not of boolean type, folding the
3645        // other boolean constant will change return type of
3646        // expression to non-boolean.
3647        assert_eq!(col("c1").get_type(&schema).unwrap(), DataType::Utf8);
3648
3649        assert_eq!(
3650            simplify(col("c1").not_eq(lit("foo"))),
3651            col("c1").not_eq(lit("foo")),
3652        );
3653    }
3654
3655    #[test]
3656    fn simplify_literal_case_equality() {
3657        // CASE WHEN c2 != false THEN "ok" ELSE "not_ok"
3658        let simple_case = Expr::Case(Case::new(
3659            None,
3660            vec![(
3661                Box::new(col("c2_non_null").not_eq(lit(false))),
3662                Box::new(lit("ok")),
3663            )],
3664            Some(Box::new(lit("not_ok"))),
3665        ));
3666
3667        // CASE WHEN c2 != false THEN "ok" ELSE "not_ok" == "ok"
3668        // -->
3669        // CASE WHEN c2 != false THEN "ok" == "ok" ELSE "not_ok" == "ok"
3670        // -->
3671        // CASE WHEN c2 != false THEN true ELSE false
3672        // -->
3673        // c2
3674        assert_eq!(
3675            simplify(binary_expr(simple_case.clone(), Operator::Eq, lit("ok"),)),
3676            col("c2_non_null"),
3677        );
3678
3679        // CASE WHEN c2 != false THEN "ok" ELSE "not_ok" != "ok"
3680        // -->
3681        // NOT(CASE WHEN c2 != false THEN "ok" == "ok" ELSE "not_ok" == "ok")
3682        // -->
3683        // NOT(CASE WHEN c2 != false THEN true ELSE false)
3684        // -->
3685        // NOT(c2)
3686        assert_eq!(
3687            simplify(binary_expr(simple_case, Operator::NotEq, lit("ok"),)),
3688            not(col("c2_non_null")),
3689        );
3690
3691        let complex_case = Expr::Case(Case::new(
3692            None,
3693            vec![
3694                (
3695                    Box::new(col("c1").eq(lit("inboxed"))),
3696                    Box::new(lit("pending")),
3697                ),
3698                (
3699                    Box::new(col("c1").eq(lit("scheduled"))),
3700                    Box::new(lit("pending")),
3701                ),
3702                (
3703                    Box::new(col("c1").eq(lit("completed"))),
3704                    Box::new(lit("completed")),
3705                ),
3706                (
3707                    Box::new(col("c1").eq(lit("paused"))),
3708                    Box::new(lit("paused")),
3709                ),
3710                (Box::new(col("c2")), Box::new(lit("running"))),
3711                (
3712                    Box::new(col("c1").eq(lit("invoked")).and(col("c3").gt(lit(0)))),
3713                    Box::new(lit("backing-off")),
3714                ),
3715            ],
3716            Some(Box::new(lit("ready"))),
3717        ));
3718
3719        assert_eq!(
3720            simplify(binary_expr(
3721                complex_case.clone(),
3722                Operator::Eq,
3723                lit("completed"),
3724            )),
3725            not_distinct_from(col("c1").eq(lit("completed")), lit(true)).and(
3726                distinct_from(col("c1").eq(lit("inboxed")), lit(true))
3727                    .and(distinct_from(col("c1").eq(lit("scheduled")), lit(true)))
3728            )
3729        );
3730
3731        assert_eq!(
3732            simplify(binary_expr(
3733                complex_case.clone(),
3734                Operator::NotEq,
3735                lit("completed"),
3736            )),
3737            distinct_from(col("c1").eq(lit("completed")), lit(true))
3738                .or(not_distinct_from(col("c1").eq(lit("inboxed")), lit(true))
3739                    .or(not_distinct_from(col("c1").eq(lit("scheduled")), lit(true))))
3740        );
3741
3742        assert_eq!(
3743            simplify(binary_expr(
3744                complex_case.clone(),
3745                Operator::Eq,
3746                lit("running"),
3747            )),
3748            not_distinct_from(col("c2"), lit(true)).and(
3749                distinct_from(col("c1").eq(lit("inboxed")), lit(true))
3750                    .and(distinct_from(col("c1").eq(lit("scheduled")), lit(true)))
3751                    .and(distinct_from(col("c1").eq(lit("completed")), lit(true)))
3752                    .and(distinct_from(col("c1").eq(lit("paused")), lit(true)))
3753            )
3754        );
3755
3756        assert_eq!(
3757            simplify(binary_expr(
3758                complex_case.clone(),
3759                Operator::Eq,
3760                lit("ready"),
3761            )),
3762            distinct_from(col("c1").eq(lit("inboxed")), lit(true))
3763                .and(distinct_from(col("c1").eq(lit("scheduled")), lit(true)))
3764                .and(distinct_from(col("c1").eq(lit("completed")), lit(true)))
3765                .and(distinct_from(col("c1").eq(lit("paused")), lit(true)))
3766                .and(distinct_from(col("c2"), lit(true)))
3767                .and(distinct_from(
3768                    col("c1").eq(lit("invoked")).and(col("c3").gt(lit(0))),
3769                    lit(true)
3770                ))
3771        );
3772
3773        assert_eq!(
3774            simplify(binary_expr(
3775                complex_case.clone(),
3776                Operator::NotEq,
3777                lit("ready"),
3778            )),
3779            not_distinct_from(col("c1").eq(lit("inboxed")), lit(true))
3780                .or(not_distinct_from(col("c1").eq(lit("scheduled")), lit(true)))
3781                .or(not_distinct_from(col("c1").eq(lit("completed")), lit(true)))
3782                .or(not_distinct_from(col("c1").eq(lit("paused")), lit(true)))
3783                .or(not_distinct_from(col("c2"), lit(true)))
3784                .or(not_distinct_from(
3785                    col("c1").eq(lit("invoked")).and(col("c3").gt(lit(0))),
3786                    lit(true)
3787                ))
3788        );
3789    }
3790
3791    #[test]
3792    fn simplify_expr_case_when_then_else() {
3793        // CASE WHEN c2 != false THEN "ok" == "not_ok" ELSE c2 == true
3794        // -->
3795        // CASE WHEN c2 THEN false ELSE c2
3796        // -->
3797        // false
3798        assert_eq!(
3799            simplify(Expr::Case(Case::new(
3800                None,
3801                vec![(
3802                    Box::new(col("c2_non_null").not_eq(lit(false))),
3803                    Box::new(lit("ok").eq(lit("not_ok"))),
3804                )],
3805                Some(Box::new(col("c2_non_null").eq(lit(true)))),
3806            ))),
3807            lit(false) // #1716
3808        );
3809
3810        // CASE WHEN c2 != false THEN "ok" == "ok" ELSE c2
3811        // -->
3812        // CASE WHEN c2 THEN true ELSE c2
3813        // -->
3814        // c2
3815        //
3816        // Need to call simplify 2x due to
3817        // https://github.com/apache/datafusion/issues/1160
3818        assert_eq!(
3819            simplify(simplify(Expr::Case(Case::new(
3820                None,
3821                vec![(
3822                    Box::new(col("c2_non_null").not_eq(lit(false))),
3823                    Box::new(lit("ok").eq(lit("ok"))),
3824                )],
3825                Some(Box::new(col("c2_non_null").eq(lit(true)))),
3826            )))),
3827            col("c2_non_null")
3828        );
3829
3830        // CASE WHEN ISNULL(c2) THEN true ELSE c2
3831        // -->
3832        // ISNULL(c2) OR c2
3833        //
3834        // Need to call simplify 2x due to
3835        // https://github.com/apache/datafusion/issues/1160
3836        assert_eq!(
3837            simplify(simplify(Expr::Case(Case::new(
3838                None,
3839                vec![(Box::new(col("c2").is_null()), Box::new(lit(true)),)],
3840                Some(Box::new(col("c2"))),
3841            )))),
3842            col("c2")
3843                .is_null()
3844                .or(col("c2").is_not_null().and(col("c2")))
3845        );
3846
3847        // CASE WHEN c1 then true WHEN c2 then false ELSE true
3848        // --> c1 OR (NOT(c1) AND c2 AND FALSE) OR (NOT(c1 OR c2) AND TRUE)
3849        // --> c1 OR (NOT(c1) AND NOT(c2))
3850        // --> c1 OR NOT(c2)
3851        //
3852        // Need to call simplify 2x due to
3853        // https://github.com/apache/datafusion/issues/1160
3854        assert_eq!(
3855            simplify(simplify(Expr::Case(Case::new(
3856                None,
3857                vec![
3858                    (Box::new(col("c1_non_null")), Box::new(lit(true)),),
3859                    (Box::new(col("c2_non_null")), Box::new(lit(false)),),
3860                ],
3861                Some(Box::new(lit(true))),
3862            )))),
3863            col("c1_non_null").or(col("c1_non_null").not().and(col("c2_non_null").not()))
3864        );
3865
3866        // CASE WHEN c1 then true WHEN c2 then true ELSE false
3867        // --> c1 OR (NOT(c1) AND c2 AND TRUE) OR (NOT(c1 OR c2) AND FALSE)
3868        // --> c1 OR (NOT(c1) AND c2)
3869        // --> c1 OR c2
3870        //
3871        // Need to call simplify 2x due to
3872        // https://github.com/apache/datafusion/issues/1160
3873        assert_eq!(
3874            simplify(simplify(Expr::Case(Case::new(
3875                None,
3876                vec![
3877                    (Box::new(col("c1_non_null")), Box::new(lit(true)),),
3878                    (Box::new(col("c2_non_null")), Box::new(lit(false)),),
3879                ],
3880                Some(Box::new(lit(true))),
3881            )))),
3882            col("c1_non_null").or(col("c1_non_null").not().and(col("c2_non_null").not()))
3883        );
3884
3885        // CASE WHEN c > 0 THEN true END AS c1
3886        assert_eq!(
3887            simplify(simplify(Expr::Case(Case::new(
3888                None,
3889                vec![(Box::new(col("c3").gt(lit(0_i64))), Box::new(lit(true)))],
3890                None,
3891            )))),
3892            not_distinct_from(col("c3").gt(lit(0_i64)), lit(true)).or(distinct_from(
3893                col("c3").gt(lit(0_i64)),
3894                lit(true)
3895            )
3896            .and(lit_bool_null()))
3897        );
3898
3899        // CASE WHEN c > 0 THEN true ELSE false END AS c1
3900        assert_eq!(
3901            simplify(simplify(Expr::Case(Case::new(
3902                None,
3903                vec![(Box::new(col("c3").gt(lit(0_i64))), Box::new(lit(true)))],
3904                Some(Box::new(lit(false))),
3905            )))),
3906            not_distinct_from(col("c3").gt(lit(0_i64)), lit(true))
3907        );
3908    }
3909
3910    #[test]
3911    fn simplify_expr_case_when_first_true() {
3912        // CASE WHEN true THEN 1 ELSE c1 END --> 1
3913        assert_eq!(
3914            simplify(Expr::Case(Case::new(
3915                None,
3916                vec![(Box::new(lit(true)), Box::new(lit(1)),)],
3917                Some(Box::new(col("c1"))),
3918            ))),
3919            lit(1)
3920        );
3921
3922        // CASE WHEN true THEN col('a') ELSE col('b') END --> col('a')
3923        assert_eq!(
3924            simplify(Expr::Case(Case::new(
3925                None,
3926                vec![(Box::new(lit(true)), Box::new(lit("a")),)],
3927                Some(Box::new(lit("b"))),
3928            ))),
3929            lit("a")
3930        );
3931
3932        // CASE WHEN true THEN col('a') WHEN col('x') > 5 THEN col('b') ELSE col('c') END --> col('a')
3933        assert_eq!(
3934            simplify(Expr::Case(Case::new(
3935                None,
3936                vec![
3937                    (Box::new(lit(true)), Box::new(lit("a"))),
3938                    (Box::new(lit("x").gt(lit(5))), Box::new(lit("b"))),
3939                ],
3940                Some(Box::new(lit("c"))),
3941            ))),
3942            lit("a")
3943        );
3944
3945        // CASE WHEN true THEN col('a') END --> col('a') (no else clause)
3946        assert_eq!(
3947            simplify(Expr::Case(Case::new(
3948                None,
3949                vec![(Box::new(lit(true)), Box::new(lit("a")),)],
3950                None,
3951            ))),
3952            lit("a")
3953        );
3954
3955        // Negative test: CASE WHEN c2 THEN 1 ELSE 2 END should not be simplified
3956        let expr = Expr::Case(Case::new(
3957            None,
3958            vec![(Box::new(col("c2")), Box::new(lit(1)))],
3959            Some(Box::new(lit(2))),
3960        ));
3961        assert_eq!(simplify(expr.clone()), expr);
3962
3963        // Negative test: CASE WHEN false THEN 1 ELSE 2 END should not use this rule
3964        let expr = Expr::Case(Case::new(
3965            None,
3966            vec![(Box::new(lit(false)), Box::new(lit(1)))],
3967            Some(Box::new(lit(2))),
3968        ));
3969        assert_ne!(simplify(expr), lit(1));
3970
3971        // Negative test: CASE WHEN col('c1') > 5 THEN 1 ELSE 2 END should not be simplified
3972        let expr = Expr::Case(Case::new(
3973            None,
3974            vec![(Box::new(col("c1").gt(lit(5))), Box::new(lit(1)))],
3975            Some(Box::new(lit(2))),
3976        ));
3977        assert_eq!(simplify(expr.clone()), expr);
3978    }
3979
3980    #[test]
3981    fn simplify_expr_case_when_any_true() {
3982        // CASE WHEN c3 > 0 THEN 'a' WHEN true THEN 'b' ELSE 'c' END --> CASE WHEN c3 > 0 THEN 'a' ELSE 'b' END
3983        assert_eq!(
3984            simplify(Expr::Case(Case::new(
3985                None,
3986                vec![
3987                    (Box::new(col("c3").gt(lit(0))), Box::new(lit("a"))),
3988                    (Box::new(lit(true)), Box::new(lit("b"))),
3989                ],
3990                Some(Box::new(lit("c"))),
3991            ))),
3992            Expr::Case(Case::new(
3993                None,
3994                vec![(Box::new(col("c3").gt(lit(0))), Box::new(lit("a")))],
3995                Some(Box::new(lit("b"))),
3996            ))
3997        );
3998
3999        // CASE WHEN c3 > 0 THEN 'a' WHEN c4 < 0 THEN 'b' WHEN true THEN 'c' WHEN c3 = 0 THEN 'd' ELSE 'e' END
4000        // --> CASE WHEN c3 > 0 THEN 'a' WHEN c4 < 0 THEN 'b' ELSE 'c' END
4001        assert_eq!(
4002            simplify(Expr::Case(Case::new(
4003                None,
4004                vec![
4005                    (Box::new(col("c3").gt(lit(0))), Box::new(lit("a"))),
4006                    (Box::new(col("c4").lt(lit(0))), Box::new(lit("b"))),
4007                    (Box::new(lit(true)), Box::new(lit("c"))),
4008                    (Box::new(col("c3").eq(lit(0))), Box::new(lit("d"))),
4009                ],
4010                Some(Box::new(lit("e"))),
4011            ))),
4012            Expr::Case(Case::new(
4013                None,
4014                vec![
4015                    (Box::new(col("c3").gt(lit(0))), Box::new(lit("a"))),
4016                    (Box::new(col("c4").lt(lit(0))), Box::new(lit("b"))),
4017                ],
4018                Some(Box::new(lit("c"))),
4019            ))
4020        );
4021
4022        // CASE WHEN c3 > 0 THEN 1 WHEN c4 < 0 THEN 2 WHEN true THEN 3 END (no else)
4023        // --> CASE WHEN c3 > 0 THEN 1 WHEN c4 < 0 THEN 2 ELSE 3 END
4024        assert_eq!(
4025            simplify(Expr::Case(Case::new(
4026                None,
4027                vec![
4028                    (Box::new(col("c3").gt(lit(0))), Box::new(lit(1))),
4029                    (Box::new(col("c4").lt(lit(0))), Box::new(lit(2))),
4030                    (Box::new(lit(true)), Box::new(lit(3))),
4031                ],
4032                None,
4033            ))),
4034            Expr::Case(Case::new(
4035                None,
4036                vec![
4037                    (Box::new(col("c3").gt(lit(0))), Box::new(lit(1))),
4038                    (Box::new(col("c4").lt(lit(0))), Box::new(lit(2))),
4039                ],
4040                Some(Box::new(lit(3))),
4041            ))
4042        );
4043
4044        // Negative test: CASE WHEN c3 > 0 THEN c3 WHEN c4 < 0 THEN 2 ELSE 3 END should not be simplified
4045        let expr = Expr::Case(Case::new(
4046            None,
4047            vec![
4048                (Box::new(col("c3").gt(lit(0))), Box::new(col("c3"))),
4049                (Box::new(col("c4").lt(lit(0))), Box::new(lit(2))),
4050            ],
4051            Some(Box::new(lit(3))),
4052        ));
4053        assert_eq!(simplify(expr.clone()), expr);
4054    }
4055
4056    #[test]
4057    fn simplify_expr_case_when_any_false() {
4058        // CASE WHEN false THEN 'a' END --> NULL
4059        assert_eq!(
4060            simplify(Expr::Case(Case::new(
4061                None,
4062                vec![(Box::new(lit(false)), Box::new(lit("a")))],
4063                None,
4064            ))),
4065            Expr::Literal(ScalarValue::Utf8(None), None)
4066        );
4067
4068        // CASE WHEN false THEN 2 ELSE 1 END --> 1
4069        assert_eq!(
4070            simplify(Expr::Case(Case::new(
4071                None,
4072                vec![(Box::new(lit(false)), Box::new(lit(2)))],
4073                Some(Box::new(lit(1))),
4074            ))),
4075            lit(1),
4076        );
4077
4078        // CASE WHEN c3 < 10 THEN 'b' WHEN false then c3 ELSE c4 END --> CASE WHEN c3 < 10 THEN b ELSE c4 END
4079        assert_eq!(
4080            simplify(Expr::Case(Case::new(
4081                None,
4082                vec![
4083                    (Box::new(col("c3").lt(lit(10))), Box::new(lit("b"))),
4084                    (Box::new(lit(false)), Box::new(col("c3"))),
4085                ],
4086                Some(Box::new(col("c4"))),
4087            ))),
4088            Expr::Case(Case::new(
4089                None,
4090                vec![(Box::new(col("c3").lt(lit(10))), Box::new(lit("b")))],
4091                Some(Box::new(col("c4"))),
4092            ))
4093        );
4094
4095        // Negative test: CASE WHEN c3 = 4 THEN 1 ELSE 2 END should not be simplified
4096        let expr = Expr::Case(Case::new(
4097            None,
4098            vec![(Box::new(col("c3").eq(lit(4))), Box::new(lit(1)))],
4099            Some(Box::new(lit(2))),
4100        ));
4101        assert_eq!(simplify(expr.clone()), expr);
4102    }
4103
4104    fn distinct_from(left: impl Into<Expr>, right: impl Into<Expr>) -> Expr {
4105        Expr::BinaryExpr(BinaryExpr {
4106            left: Box::new(left.into()),
4107            op: Operator::IsDistinctFrom,
4108            right: Box::new(right.into()),
4109        })
4110    }
4111
4112    fn not_distinct_from(left: impl Into<Expr>, right: impl Into<Expr>) -> Expr {
4113        Expr::BinaryExpr(BinaryExpr {
4114            left: Box::new(left.into()),
4115            op: Operator::IsNotDistinctFrom,
4116            right: Box::new(right.into()),
4117        })
4118    }
4119
4120    #[test]
4121    fn simplify_expr_bool_or() {
4122        // col || true is always true
4123        assert_eq!(simplify(col("c2").or(lit(true))), lit(true),);
4124
4125        // col || false is always col
4126        assert_eq!(simplify(col("c2").or(lit(false))), col("c2"),);
4127
4128        // true || null is always true
4129        assert_eq!(simplify(lit(true).or(lit_bool_null())), lit(true),);
4130
4131        // null || true is always true
4132        assert_eq!(simplify(lit_bool_null().or(lit(true))), lit(true),);
4133
4134        // false || null is always null
4135        assert_eq!(simplify(lit(false).or(lit_bool_null())), lit_bool_null(),);
4136
4137        // null || false is always null
4138        assert_eq!(simplify(lit_bool_null().or(lit(false))), lit_bool_null(),);
4139
4140        // ( c1 BETWEEN Int32(0) AND Int32(10) ) OR Boolean(NULL)
4141        // it can be either NULL or  TRUE depending on the value of `c1 BETWEEN Int32(0) AND Int32(10)`
4142        // and should not be rewritten
4143        let expr = col("c1").between(lit(0), lit(10));
4144        let expr = expr.or(lit_bool_null());
4145        let result = simplify(expr);
4146
4147        let expected_expr = or(
4148            and(col("c1").gt_eq(lit(0)), col("c1").lt_eq(lit(10))),
4149            lit_bool_null(),
4150        );
4151        assert_eq!(expected_expr, result);
4152    }
4153
4154    #[test]
4155    fn simplify_inlist() {
4156        assert_eq!(simplify(in_list(col("c1"), vec![], false)), lit(false));
4157        assert_eq!(simplify(in_list(col("c1"), vec![], true)), lit(true));
4158
4159        // null in (...)  --> null
4160        assert_eq!(
4161            simplify(in_list(lit_bool_null(), vec![col("c1"), lit(1)], false)),
4162            lit_bool_null()
4163        );
4164
4165        // null not in (...)  --> null
4166        assert_eq!(
4167            simplify(in_list(lit_bool_null(), vec![col("c1"), lit(1)], true)),
4168            lit_bool_null()
4169        );
4170
4171        assert_eq!(
4172            simplify(in_list(col("c1"), vec![lit(1)], false)),
4173            col("c1").eq(lit(1))
4174        );
4175        assert_eq!(
4176            simplify(in_list(col("c1"), vec![lit(1)], true)),
4177            col("c1").not_eq(lit(1))
4178        );
4179
4180        // more complex expressions can be simplified if list contains
4181        // one element only
4182        assert_eq!(
4183            simplify(in_list(col("c1") * lit(10), vec![lit(2)], false)),
4184            (col("c1") * lit(10)).eq(lit(2))
4185        );
4186
4187        assert_eq!(
4188            simplify(in_list(col("c1"), vec![lit(1), lit(2)], false)),
4189            col("c1").eq(lit(1)).or(col("c1").eq(lit(2)))
4190        );
4191        assert_eq!(
4192            simplify(in_list(col("c1"), vec![lit(1), lit(2)], true)),
4193            col("c1").not_eq(lit(1)).and(col("c1").not_eq(lit(2)))
4194        );
4195
4196        let subquery = Arc::new(test_table_scan_with_name("test").unwrap());
4197        assert_eq!(
4198            simplify(in_list(
4199                col("c1"),
4200                vec![scalar_subquery(Arc::clone(&subquery))],
4201                false
4202            )),
4203            in_subquery(col("c1"), Arc::clone(&subquery))
4204        );
4205        assert_eq!(
4206            simplify(in_list(
4207                col("c1"),
4208                vec![scalar_subquery(Arc::clone(&subquery))],
4209                true
4210            )),
4211            not_in_subquery(col("c1"), subquery)
4212        );
4213
4214        let subquery1 =
4215            scalar_subquery(Arc::new(test_table_scan_with_name("test1").unwrap()));
4216        let subquery2 =
4217            scalar_subquery(Arc::new(test_table_scan_with_name("test2").unwrap()));
4218
4219        // c1 NOT IN (<subquery1>, <subquery2>) -> c1 != <subquery1> AND c1 != <subquery2>
4220        assert_eq!(
4221            simplify(in_list(
4222                col("c1"),
4223                vec![subquery1.clone(), subquery2.clone()],
4224                true
4225            )),
4226            col("c1")
4227                .not_eq(subquery1.clone())
4228                .and(col("c1").not_eq(subquery2.clone()))
4229        );
4230
4231        // c1 IN (<subquery1>, <subquery2>) -> c1 == <subquery1> OR c1 == <subquery2>
4232        assert_eq!(
4233            simplify(in_list(
4234                col("c1"),
4235                vec![subquery1.clone(), subquery2.clone()],
4236                false
4237            )),
4238            col("c1").eq(subquery1).or(col("c1").eq(subquery2))
4239        );
4240
4241        // 1. c1 IN (1,2,3,4) AND c1 IN (5,6,7,8) -> false
4242        let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false).and(
4243            in_list(col("c1"), vec![lit(5), lit(6), lit(7), lit(8)], false),
4244        );
4245        assert_eq!(simplify(expr), lit(false));
4246
4247        // 2. c1 IN (1,2,3,4) AND c1 IN (4,5,6,7) -> c1 = 4
4248        let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false).and(
4249            in_list(col("c1"), vec![lit(4), lit(5), lit(6), lit(7)], false),
4250        );
4251        assert_eq!(simplify(expr), col("c1").eq(lit(4)));
4252
4253        // 3. c1 NOT IN (1, 2, 3, 4) OR c1 NOT IN (5, 6, 7, 8) -> true
4254        let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).or(
4255            in_list(col("c1"), vec![lit(5), lit(6), lit(7), lit(8)], true),
4256        );
4257        assert_eq!(simplify(expr), lit(true));
4258
4259        // 3.5 c1 NOT IN (1, 2, 3, 4) OR c1 NOT IN (4, 5, 6, 7) -> c1 != 4 (4 overlaps)
4260        let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).or(
4261            in_list(col("c1"), vec![lit(4), lit(5), lit(6), lit(7)], true),
4262        );
4263        assert_eq!(simplify(expr), col("c1").not_eq(lit(4)));
4264
4265        // 4. c1 NOT IN (1,2,3,4) AND c1 NOT IN (4,5,6,7) -> c1 NOT IN (1,2,3,4,5,6,7)
4266        let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).and(
4267            in_list(col("c1"), vec![lit(4), lit(5), lit(6), lit(7)], true),
4268        );
4269        assert_eq!(
4270            simplify(expr),
4271            in_list(
4272                col("c1"),
4273                vec![lit(1), lit(2), lit(3), lit(4), lit(5), lit(6), lit(7)],
4274                true
4275            )
4276        );
4277
4278        // 5. c1 IN (1,2,3,4) OR c1 IN (2,3,4,5) -> c1 IN (1,2,3,4,5)
4279        let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false).or(
4280            in_list(col("c1"), vec![lit(2), lit(3), lit(4), lit(5)], false),
4281        );
4282        assert_eq!(
4283            simplify(expr),
4284            in_list(
4285                col("c1"),
4286                vec![lit(1), lit(2), lit(3), lit(4), lit(5)],
4287                false
4288            )
4289        );
4290
4291        // 6. c1 IN (1,2,3) AND c1 NOT INT (1,2,3,4,5) -> false
4292        let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3)], false).and(in_list(
4293            col("c1"),
4294            vec![lit(1), lit(2), lit(3), lit(4), lit(5)],
4295            true,
4296        ));
4297        assert_eq!(simplify(expr), lit(false));
4298
4299        // 7. c1 NOT IN (1,2,3,4) AND c1 IN (1,2,3,4,5) -> c1 = 5
4300        let expr =
4301            in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).and(in_list(
4302                col("c1"),
4303                vec![lit(1), lit(2), lit(3), lit(4), lit(5)],
4304                false,
4305            ));
4306        assert_eq!(simplify(expr), col("c1").eq(lit(5)));
4307
4308        // 8. c1 IN (1,2,3,4) AND c1 NOT IN (5,6,7,8) -> c1 IN (1,2,3,4)
4309        let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false).and(
4310            in_list(col("c1"), vec![lit(5), lit(6), lit(7), lit(8)], true),
4311        );
4312        assert_eq!(
4313            simplify(expr),
4314            in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false)
4315        );
4316
4317        // inlist with more than two expressions
4318        // c1 IN (1,2,3,4,5,6) AND c1 IN (1,3,5,6) AND c1 IN (3,6) -> c1 = 3 OR c1 = 6
4319        let expr = in_list(
4320            col("c1"),
4321            vec![lit(1), lit(2), lit(3), lit(4), lit(5), lit(6)],
4322            false,
4323        )
4324        .and(in_list(
4325            col("c1"),
4326            vec![lit(1), lit(3), lit(5), lit(6)],
4327            false,
4328        ))
4329        .and(in_list(col("c1"), vec![lit(3), lit(6)], false));
4330        assert_eq!(
4331            simplify(expr),
4332            col("c1").eq(lit(3)).or(col("c1").eq(lit(6)))
4333        );
4334
4335        // c1 NOT IN (1,2,3,4) AND c1 IN (5,6,7,8) AND c1 NOT IN (3,4,5,6) AND c1 IN (8,9,10) -> c1 = 8
4336        let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).and(
4337            in_list(col("c1"), vec![lit(5), lit(6), lit(7), lit(8)], false)
4338                .and(in_list(
4339                    col("c1"),
4340                    vec![lit(3), lit(4), lit(5), lit(6)],
4341                    true,
4342                ))
4343                .and(in_list(col("c1"), vec![lit(8), lit(9), lit(10)], false)),
4344        );
4345        assert_eq!(simplify(expr), col("c1").eq(lit(8)));
4346
4347        // Contains non-InList expression
4348        // c1 NOT IN (1,2,3,4) OR c1 != 5 OR c1 NOT IN (6,7,8,9) -> c1 NOT IN (1,2,3,4) OR c1 != 5 OR c1 NOT IN (6,7,8,9)
4349        let expr =
4350            in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).or(col("c1")
4351                .not_eq(lit(5))
4352                .or(in_list(
4353                    col("c1"),
4354                    vec![lit(6), lit(7), lit(8), lit(9)],
4355                    true,
4356                )));
4357        // TODO: Further simplify this expression
4358        // https://github.com/apache/datafusion/issues/8970
4359        // assert_eq!(simplify(expr.clone()), lit(true));
4360        assert_eq!(simplify(expr.clone()), expr);
4361    }
4362
4363    #[test]
4364    fn simplify_null_in_empty_inlist() {
4365        // `NULL::boolean IN ()` == `NULL::boolean IN (SELECT foo FROM empty)` == false
4366        let expr = in_list(lit_bool_null(), vec![], false);
4367        assert_eq!(simplify(expr), lit(false));
4368
4369        // `NULL::boolean NOT IN ()` == `NULL::boolean NOT IN (SELECT foo FROM empty)` == true
4370        let expr = in_list(lit_bool_null(), vec![], true);
4371        assert_eq!(simplify(expr), lit(true));
4372
4373        // `NULL IN ()` == `NULL IN (SELECT foo FROM empty)` == false
4374        let null_null = || Expr::Literal(ScalarValue::Null, None);
4375        let expr = in_list(null_null(), vec![], false);
4376        assert_eq!(simplify(expr), lit(false));
4377
4378        // `NULL NOT IN ()` == `NULL NOT IN (SELECT foo FROM empty)` == true
4379        let expr = in_list(null_null(), vec![], true);
4380        assert_eq!(simplify(expr), lit(true));
4381    }
4382
4383    #[test]
4384    fn just_simplifier_simplify_null_in_empty_inlist() {
4385        let simplify = |expr: Expr| -> Expr {
4386            let schema = expr_test_schema();
4387            let execution_props = ExecutionProps::new();
4388            let info = SimplifyContext::new(&execution_props).with_schema(schema);
4389            let simplifier = &mut Simplifier::new(&info);
4390            expr.rewrite(simplifier)
4391                .expect("Failed to simplify expression")
4392                .data
4393        };
4394
4395        // `NULL::boolean IN ()` == `NULL::boolean IN (SELECT foo FROM empty)` == false
4396        let expr = in_list(lit_bool_null(), vec![], false);
4397        assert_eq!(simplify(expr), lit(false));
4398
4399        // `NULL::boolean NOT IN ()` == `NULL::boolean NOT IN (SELECT foo FROM empty)` == true
4400        let expr = in_list(lit_bool_null(), vec![], true);
4401        assert_eq!(simplify(expr), lit(true));
4402
4403        // `NULL IN ()` == `NULL IN (SELECT foo FROM empty)` == false
4404        let null_null = || Expr::Literal(ScalarValue::Null, None);
4405        let expr = in_list(null_null(), vec![], false);
4406        assert_eq!(simplify(expr), lit(false));
4407
4408        // `NULL NOT IN ()` == `NULL NOT IN (SELECT foo FROM empty)` == true
4409        let expr = in_list(null_null(), vec![], true);
4410        assert_eq!(simplify(expr), lit(true));
4411    }
4412
4413    #[test]
4414    fn simplify_large_or() {
4415        let expr = (0..5)
4416            .map(|i| col("c1").eq(lit(i)))
4417            .fold(lit(false), |acc, e| acc.or(e));
4418        assert_eq!(
4419            simplify(expr),
4420            in_list(col("c1"), (0..5).map(lit).collect(), false),
4421        );
4422    }
4423
4424    #[test]
4425    fn simplify_expr_bool_and() {
4426        // col & true is always col
4427        assert_eq!(simplify(col("c2").and(lit(true))), col("c2"),);
4428        // col & false is always false
4429        assert_eq!(simplify(col("c2").and(lit(false))), lit(false),);
4430
4431        // true && null is always null
4432        assert_eq!(simplify(lit(true).and(lit_bool_null())), lit_bool_null(),);
4433
4434        // null && true is always null
4435        assert_eq!(simplify(lit_bool_null().and(lit(true))), lit_bool_null(),);
4436
4437        // false && null is always false
4438        assert_eq!(simplify(lit(false).and(lit_bool_null())), lit(false),);
4439
4440        // null && false is always false
4441        assert_eq!(simplify(lit_bool_null().and(lit(false))), lit(false),);
4442
4443        // c1 BETWEEN Int32(0) AND Int32(10) AND Boolean(NULL)
4444        // it can be either NULL or FALSE depending on the value of `c1 BETWEEN Int32(0) AND Int32(10)`
4445        // and the Boolean(NULL) should remain
4446        let expr = col("c1").between(lit(0), lit(10));
4447        let expr = expr.and(lit_bool_null());
4448        let result = simplify(expr);
4449
4450        let expected_expr = and(
4451            and(col("c1").gt_eq(lit(0)), col("c1").lt_eq(lit(10))),
4452            lit_bool_null(),
4453        );
4454        assert_eq!(expected_expr, result);
4455    }
4456
4457    #[test]
4458    fn simplify_expr_between() {
4459        // c2 between 3 and 4 is c2 >= 3 and c2 <= 4
4460        let expr = col("c2").between(lit(3), lit(4));
4461        assert_eq!(
4462            simplify(expr),
4463            and(col("c2").gt_eq(lit(3)), col("c2").lt_eq(lit(4)))
4464        );
4465
4466        // c2 not between 3 and 4 is c2 < 3 or c2 > 4
4467        let expr = col("c2").not_between(lit(3), lit(4));
4468        assert_eq!(
4469            simplify(expr),
4470            or(col("c2").lt(lit(3)), col("c2").gt(lit(4)))
4471        );
4472    }
4473
4474    #[test]
4475    fn test_like_and_ilike() {
4476        let null = lit(ScalarValue::Utf8(None));
4477
4478        // expr [NOT] [I]LIKE NULL
4479        let expr = col("c1").like(null.clone());
4480        assert_eq!(simplify(expr), lit_bool_null());
4481
4482        let expr = col("c1").not_like(null.clone());
4483        assert_eq!(simplify(expr), lit_bool_null());
4484
4485        let expr = col("c1").ilike(null.clone());
4486        assert_eq!(simplify(expr), lit_bool_null());
4487
4488        let expr = col("c1").not_ilike(null.clone());
4489        assert_eq!(simplify(expr), lit_bool_null());
4490
4491        // expr [NOT] [I]LIKE '%'
4492        let expr = col("c1").like(lit("%"));
4493        assert_eq!(simplify(expr), if_not_null(col("c1"), true));
4494
4495        let expr = col("c1").not_like(lit("%"));
4496        assert_eq!(simplify(expr), if_not_null(col("c1"), false));
4497
4498        let expr = col("c1").ilike(lit("%"));
4499        assert_eq!(simplify(expr), if_not_null(col("c1"), true));
4500
4501        let expr = col("c1").not_ilike(lit("%"));
4502        assert_eq!(simplify(expr), if_not_null(col("c1"), false));
4503
4504        // expr [NOT] [I]LIKE '%%'
4505        let expr = col("c1").like(lit("%%"));
4506        assert_eq!(simplify(expr), if_not_null(col("c1"), true));
4507
4508        let expr = col("c1").not_like(lit("%%"));
4509        assert_eq!(simplify(expr), if_not_null(col("c1"), false));
4510
4511        let expr = col("c1").ilike(lit("%%"));
4512        assert_eq!(simplify(expr), if_not_null(col("c1"), true));
4513
4514        let expr = col("c1").not_ilike(lit("%%"));
4515        assert_eq!(simplify(expr), if_not_null(col("c1"), false));
4516
4517        // not_null_expr [NOT] [I]LIKE '%'
4518        let expr = col("c1_non_null").like(lit("%"));
4519        assert_eq!(simplify(expr), lit(true));
4520
4521        let expr = col("c1_non_null").not_like(lit("%"));
4522        assert_eq!(simplify(expr), lit(false));
4523
4524        let expr = col("c1_non_null").ilike(lit("%"));
4525        assert_eq!(simplify(expr), lit(true));
4526
4527        let expr = col("c1_non_null").not_ilike(lit("%"));
4528        assert_eq!(simplify(expr), lit(false));
4529
4530        // not_null_expr [NOT] [I]LIKE '%%'
4531        let expr = col("c1_non_null").like(lit("%%"));
4532        assert_eq!(simplify(expr), lit(true));
4533
4534        let expr = col("c1_non_null").not_like(lit("%%"));
4535        assert_eq!(simplify(expr), lit(false));
4536
4537        let expr = col("c1_non_null").ilike(lit("%%"));
4538        assert_eq!(simplify(expr), lit(true));
4539
4540        let expr = col("c1_non_null").not_ilike(lit("%%"));
4541        assert_eq!(simplify(expr), lit(false));
4542
4543        // null_constant [NOT] [I]LIKE '%'
4544        let expr = null.clone().like(lit("%"));
4545        assert_eq!(simplify(expr), lit_bool_null());
4546
4547        let expr = null.clone().not_like(lit("%"));
4548        assert_eq!(simplify(expr), lit_bool_null());
4549
4550        let expr = null.clone().ilike(lit("%"));
4551        assert_eq!(simplify(expr), lit_bool_null());
4552
4553        let expr = null.clone().not_ilike(lit("%"));
4554        assert_eq!(simplify(expr), lit_bool_null());
4555
4556        // null_constant [NOT] [I]LIKE '%%'
4557        let expr = null.clone().like(lit("%%"));
4558        assert_eq!(simplify(expr), lit_bool_null());
4559
4560        let expr = null.clone().not_like(lit("%%"));
4561        assert_eq!(simplify(expr), lit_bool_null());
4562
4563        let expr = null.clone().ilike(lit("%%"));
4564        assert_eq!(simplify(expr), lit_bool_null());
4565
4566        let expr = null.clone().not_ilike(lit("%%"));
4567        assert_eq!(simplify(expr), lit_bool_null());
4568
4569        // null_constant [NOT] [I]LIKE 'a%'
4570        let expr = null.clone().like(lit("a%"));
4571        assert_eq!(simplify(expr), lit_bool_null());
4572
4573        let expr = null.clone().not_like(lit("a%"));
4574        assert_eq!(simplify(expr), lit_bool_null());
4575
4576        let expr = null.clone().ilike(lit("a%"));
4577        assert_eq!(simplify(expr), lit_bool_null());
4578
4579        let expr = null.clone().not_ilike(lit("a%"));
4580        assert_eq!(simplify(expr), lit_bool_null());
4581
4582        // expr [NOT] [I]LIKE with pattern without wildcards
4583        let expr = col("c1").like(lit("a"));
4584        assert_eq!(simplify(expr), col("c1").eq(lit("a")));
4585        let expr = col("c1").not_like(lit("a"));
4586        assert_eq!(simplify(expr), col("c1").not_eq(lit("a")));
4587        let expr = col("c1").like(lit("a_"));
4588        assert_eq!(simplify(expr), col("c1").like(lit("a_")));
4589        let expr = col("c1").not_like(lit("a_"));
4590        assert_eq!(simplify(expr), col("c1").not_like(lit("a_")));
4591
4592        let expr = col("c1").ilike(lit("a"));
4593        assert_eq!(simplify(expr), col("c1").ilike(lit("a")));
4594        let expr = col("c1").not_ilike(lit("a"));
4595        assert_eq!(simplify(expr), col("c1").not_ilike(lit("a")));
4596    }
4597
4598    #[test]
4599    fn test_simplify_with_guarantee() {
4600        // (c3 >= 3) AND (c4 + 2 < 10 OR (c1 NOT IN ("a", "b")))
4601        let expr_x = col("c3").gt(lit(3_i64));
4602        let expr_y = (col("c4") + lit(2_u32)).lt(lit(10_u32));
4603        let expr_z = col("c1").in_list(vec![lit("a"), lit("b")], true);
4604        let expr = expr_x.clone().and(expr_y.or(expr_z));
4605
4606        // All guaranteed null
4607        let guarantees = vec![
4608            (col("c3"), NullableInterval::from(ScalarValue::Int64(None))),
4609            (col("c4"), NullableInterval::from(ScalarValue::UInt32(None))),
4610            (col("c1"), NullableInterval::from(ScalarValue::Utf8(None))),
4611        ];
4612
4613        let output = simplify_with_guarantee(expr.clone(), guarantees);
4614        assert_eq!(output, lit_bool_null());
4615
4616        // All guaranteed false
4617        let guarantees = vec![
4618            (
4619                col("c3"),
4620                NullableInterval::NotNull {
4621                    values: Interval::make(Some(0_i64), Some(2_i64)).unwrap(),
4622                },
4623            ),
4624            (
4625                col("c4"),
4626                NullableInterval::from(ScalarValue::UInt32(Some(9))),
4627            ),
4628            (col("c1"), NullableInterval::from(ScalarValue::from("a"))),
4629        ];
4630        let output = simplify_with_guarantee(expr.clone(), guarantees);
4631        assert_eq!(output, lit(false));
4632
4633        // Guaranteed false or null -> no change.
4634        let guarantees = vec![
4635            (
4636                col("c3"),
4637                NullableInterval::MaybeNull {
4638                    values: Interval::make(Some(0_i64), Some(2_i64)).unwrap(),
4639                },
4640            ),
4641            (
4642                col("c4"),
4643                NullableInterval::MaybeNull {
4644                    values: Interval::make(Some(9_u32), Some(9_u32)).unwrap(),
4645                },
4646            ),
4647            (
4648                col("c1"),
4649                NullableInterval::NotNull {
4650                    values: Interval::try_new(
4651                        ScalarValue::from("d"),
4652                        ScalarValue::from("f"),
4653                    )
4654                    .unwrap(),
4655                },
4656            ),
4657        ];
4658        let output = simplify_with_guarantee(expr.clone(), guarantees);
4659        assert_eq!(&output, &expr_x);
4660
4661        // Sufficient true guarantees
4662        let guarantees = vec![
4663            (
4664                col("c3"),
4665                NullableInterval::from(ScalarValue::Int64(Some(9))),
4666            ),
4667            (
4668                col("c4"),
4669                NullableInterval::from(ScalarValue::UInt32(Some(3))),
4670            ),
4671        ];
4672        let output = simplify_with_guarantee(expr.clone(), guarantees);
4673        assert_eq!(output, lit(true));
4674
4675        // Only partially simplify
4676        let guarantees = vec![(
4677            col("c4"),
4678            NullableInterval::from(ScalarValue::UInt32(Some(3))),
4679        )];
4680        let output = simplify_with_guarantee(expr, guarantees);
4681        assert_eq!(&output, &expr_x);
4682    }
4683
4684    #[test]
4685    fn test_expression_partial_simplify_1() {
4686        // (1 + 2) + (4 / 0) -> 3 + (4 / 0)
4687        let expr = (lit(1) + lit(2)) + (lit(4) / lit(0));
4688        let expected = (lit(3)) + (lit(4) / lit(0));
4689
4690        assert_eq!(simplify(expr), expected);
4691    }
4692
4693    #[test]
4694    fn test_expression_partial_simplify_2() {
4695        // (1 > 2) and (4 / 0) -> false
4696        let expr = (lit(1).gt(lit(2))).and(lit(4) / lit(0));
4697        let expected = lit(false);
4698
4699        assert_eq!(simplify(expr), expected);
4700    }
4701
4702    #[test]
4703    fn test_simplify_cycles() {
4704        // TRUE
4705        let expr = lit(true);
4706        let expected = lit(true);
4707        let (expr, num_iter) = simplify_with_cycle_count(expr);
4708        assert_eq!(expr, expected);
4709        assert_eq!(num_iter, 1);
4710
4711        // (true != NULL) OR (5 > 10)
4712        let expr = lit(true).not_eq(lit_bool_null()).or(lit(5).gt(lit(10)));
4713        let expected = lit_bool_null();
4714        let (expr, num_iter) = simplify_with_cycle_count(expr);
4715        assert_eq!(expr, expected);
4716        assert_eq!(num_iter, 2);
4717
4718        // NOTE: this currently does not simplify
4719        // (((c4 - 10) + 10) *100) / 100
4720        let expr = (((col("c4") - lit(10)) + lit(10)) * lit(100)) / lit(100);
4721        let expected = expr.clone();
4722        let (expr, num_iter) = simplify_with_cycle_count(expr);
4723        assert_eq!(expr, expected);
4724        assert_eq!(num_iter, 1);
4725
4726        // ((c4<1 or c3<2) and c3_non_null<3) and false
4727        let expr = col("c4")
4728            .lt(lit(1))
4729            .or(col("c3").lt(lit(2)))
4730            .and(col("c3_non_null").lt(lit(3)))
4731            .and(lit(false));
4732        let expected = lit(false);
4733        let (expr, num_iter) = simplify_with_cycle_count(expr);
4734        assert_eq!(expr, expected);
4735        assert_eq!(num_iter, 2);
4736    }
4737
4738    fn boolean_test_schema() -> DFSchemaRef {
4739        static BOOLEAN_TEST_SCHEMA: LazyLock<DFSchemaRef> = LazyLock::new(|| {
4740            Schema::new(vec![
4741                Field::new("A", DataType::Boolean, false),
4742                Field::new("B", DataType::Boolean, false),
4743                Field::new("C", DataType::Boolean, false),
4744                Field::new("D", DataType::Boolean, false),
4745            ])
4746            .to_dfschema_ref()
4747            .unwrap()
4748        });
4749        Arc::clone(&BOOLEAN_TEST_SCHEMA)
4750    }
4751
4752    #[test]
4753    fn simplify_common_factor_conjunction_in_disjunction() {
4754        let props = ExecutionProps::new();
4755        let schema = boolean_test_schema();
4756        let simplifier =
4757            ExprSimplifier::new(SimplifyContext::new(&props).with_schema(schema));
4758
4759        let a = || col("A");
4760        let b = || col("B");
4761        let c = || col("C");
4762        let d = || col("D");
4763
4764        // (A AND B) OR (A AND C) -> A AND (B OR C)
4765        let expr = a().and(b()).or(a().and(c()));
4766        let expected = a().and(b().or(c()));
4767
4768        assert_eq!(expected, simplifier.simplify(expr).unwrap());
4769
4770        // (A AND B) OR (A AND C) OR (A AND D) -> A AND (B OR C OR D)
4771        let expr = a().and(b()).or(a().and(c())).or(a().and(d()));
4772        let expected = a().and(b().or(c()).or(d()));
4773        assert_eq!(expected, simplifier.simplify(expr).unwrap());
4774
4775        // A OR (B AND C AND A) -> A
4776        let expr = a().or(b().and(c().and(a())));
4777        let expected = a();
4778        assert_eq!(expected, simplifier.simplify(expr).unwrap());
4779    }
4780
4781    #[test]
4782    fn test_simplify_udaf() {
4783        let udaf = AggregateUDF::new_from_impl(SimplifyMockUdaf::new_with_simplify());
4784        let aggregate_function_expr =
4785            Expr::AggregateFunction(expr::AggregateFunction::new_udf(
4786                udaf.into(),
4787                vec![],
4788                false,
4789                None,
4790                vec![],
4791                None,
4792            ));
4793
4794        let expected = col("result_column");
4795        assert_eq!(simplify(aggregate_function_expr), expected);
4796
4797        let udaf = AggregateUDF::new_from_impl(SimplifyMockUdaf::new_without_simplify());
4798        let aggregate_function_expr =
4799            Expr::AggregateFunction(expr::AggregateFunction::new_udf(
4800                udaf.into(),
4801                vec![],
4802                false,
4803                None,
4804                vec![],
4805                None,
4806            ));
4807
4808        let expected = aggregate_function_expr.clone();
4809        assert_eq!(simplify(aggregate_function_expr), expected);
4810    }
4811
4812    /// A Mock UDAF which defines `simplify` to be used in tests
4813    /// related to UDAF simplification
4814    #[derive(Debug, Clone, PartialEq, Eq, Hash)]
4815    struct SimplifyMockUdaf {
4816        simplify: bool,
4817    }
4818
4819    impl SimplifyMockUdaf {
4820        /// make simplify method return new expression
4821        fn new_with_simplify() -> Self {
4822            Self { simplify: true }
4823        }
4824        /// make simplify method return no change
4825        fn new_without_simplify() -> Self {
4826            Self { simplify: false }
4827        }
4828    }
4829
4830    impl AggregateUDFImpl for SimplifyMockUdaf {
4831        fn as_any(&self) -> &dyn std::any::Any {
4832            self
4833        }
4834
4835        fn name(&self) -> &str {
4836            "mock_simplify"
4837        }
4838
4839        fn signature(&self) -> &Signature {
4840            unimplemented!()
4841        }
4842
4843        fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
4844            unimplemented!("not needed for tests")
4845        }
4846
4847        fn accumulator(
4848            &self,
4849            _acc_args: AccumulatorArgs,
4850        ) -> Result<Box<dyn Accumulator>> {
4851            unimplemented!("not needed for tests")
4852        }
4853
4854        fn groups_accumulator_supported(&self, _args: AccumulatorArgs) -> bool {
4855            unimplemented!("not needed for testing")
4856        }
4857
4858        fn create_groups_accumulator(
4859            &self,
4860            _args: AccumulatorArgs,
4861        ) -> Result<Box<dyn GroupsAccumulator>> {
4862            unimplemented!("not needed for testing")
4863        }
4864
4865        fn simplify(&self) -> Option<AggregateFunctionSimplification> {
4866            if self.simplify {
4867                Some(Box::new(|_, _| Ok(col("result_column"))))
4868            } else {
4869                None
4870            }
4871        }
4872    }
4873
4874    #[test]
4875    fn test_simplify_udwf() {
4876        let udwf = WindowFunctionDefinition::WindowUDF(
4877            WindowUDF::new_from_impl(SimplifyMockUdwf::new_with_simplify()).into(),
4878        );
4879        let window_function_expr = Expr::from(WindowFunction::new(udwf, vec![]));
4880
4881        let expected = col("result_column");
4882        assert_eq!(simplify(window_function_expr), expected);
4883
4884        let udwf = WindowFunctionDefinition::WindowUDF(
4885            WindowUDF::new_from_impl(SimplifyMockUdwf::new_without_simplify()).into(),
4886        );
4887        let window_function_expr = Expr::from(WindowFunction::new(udwf, vec![]));
4888
4889        let expected = window_function_expr.clone();
4890        assert_eq!(simplify(window_function_expr), expected);
4891    }
4892
4893    /// A Mock UDWF which defines `simplify` to be used in tests
4894    /// related to UDWF simplification
4895    #[derive(Debug, Clone, PartialEq, Eq, Hash)]
4896    struct SimplifyMockUdwf {
4897        simplify: bool,
4898    }
4899
4900    impl SimplifyMockUdwf {
4901        /// make simplify method return new expression
4902        fn new_with_simplify() -> Self {
4903            Self { simplify: true }
4904        }
4905        /// make simplify method return no change
4906        fn new_without_simplify() -> Self {
4907            Self { simplify: false }
4908        }
4909    }
4910
4911    impl WindowUDFImpl for SimplifyMockUdwf {
4912        fn as_any(&self) -> &dyn std::any::Any {
4913            self
4914        }
4915
4916        fn name(&self) -> &str {
4917            "mock_simplify"
4918        }
4919
4920        fn signature(&self) -> &Signature {
4921            unimplemented!()
4922        }
4923
4924        fn simplify(&self) -> Option<WindowFunctionSimplification> {
4925            if self.simplify {
4926                Some(Box::new(|_, _| Ok(col("result_column"))))
4927            } else {
4928                None
4929            }
4930        }
4931
4932        fn partition_evaluator(
4933            &self,
4934            _partition_evaluator_args: PartitionEvaluatorArgs,
4935        ) -> Result<Box<dyn PartitionEvaluator>> {
4936            unimplemented!("not needed for tests")
4937        }
4938
4939        fn field(&self, _field_args: WindowUDFFieldArgs) -> Result<FieldRef> {
4940            unimplemented!("not needed for tests")
4941        }
4942
4943        fn limit_effect(&self, _args: &[Arc<dyn PhysicalExpr>]) -> LimitEffect {
4944            LimitEffect::Unknown
4945        }
4946    }
4947    #[derive(Debug, PartialEq, Eq, Hash)]
4948    struct VolatileUdf {
4949        signature: Signature,
4950    }
4951
4952    impl VolatileUdf {
4953        pub fn new() -> Self {
4954            Self {
4955                signature: Signature::exact(vec![], Volatility::Volatile),
4956            }
4957        }
4958    }
4959    impl ScalarUDFImpl for VolatileUdf {
4960        fn as_any(&self) -> &dyn std::any::Any {
4961            self
4962        }
4963
4964        fn name(&self) -> &str {
4965            "VolatileUdf"
4966        }
4967
4968        fn signature(&self) -> &Signature {
4969            &self.signature
4970        }
4971
4972        fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
4973            Ok(DataType::Int16)
4974        }
4975
4976        fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result<ColumnarValue> {
4977            panic!("dummy - not implemented")
4978        }
4979    }
4980
4981    #[test]
4982    fn test_optimize_volatile_conditions() {
4983        let fun = Arc::new(ScalarUDF::new_from_impl(VolatileUdf::new()));
4984        let rand = Expr::ScalarFunction(ScalarFunction::new_udf(fun, vec![]));
4985        {
4986            let expr = rand
4987                .clone()
4988                .eq(lit(0))
4989                .or(col("column1").eq(lit(2)).and(rand.clone().eq(lit(0))));
4990
4991            assert_eq!(simplify(expr.clone()), expr);
4992        }
4993
4994        {
4995            let expr = col("column1")
4996                .eq(lit(2))
4997                .or(col("column1").eq(lit(2)).and(rand.clone().eq(lit(0))));
4998
4999            assert_eq!(simplify(expr), col("column1").eq(lit(2)));
5000        }
5001
5002        {
5003            let expr = (col("column1").eq(lit(2)).and(rand.clone().eq(lit(0)))).or(col(
5004                "column1",
5005            )
5006            .eq(lit(2))
5007            .and(rand.clone().eq(lit(0))));
5008
5009            assert_eq!(
5010                simplify(expr),
5011                col("column1")
5012                    .eq(lit(2))
5013                    .and((rand.clone().eq(lit(0))).or(rand.clone().eq(lit(0))))
5014            );
5015        }
5016    }
5017
5018    #[test]
5019    fn simplify_fixed_size_binary_eq_lit() {
5020        let bytes = [1u8, 2, 3].as_slice();
5021
5022        // The expression starts simple.
5023        let expr = col("c5").eq(lit(bytes));
5024
5025        // The type coercer introduces a cast.
5026        let coerced = coerce(expr.clone());
5027        let schema = expr_test_schema();
5028        assert_eq!(
5029            coerced,
5030            col("c5")
5031                .cast_to(&DataType::Binary, schema.as_ref())
5032                .unwrap()
5033                .eq(lit(bytes))
5034        );
5035
5036        // The simplifier removes the cast.
5037        assert_eq!(
5038            simplify(coerced),
5039            col("c5").eq(Expr::Literal(
5040                ScalarValue::FixedSizeBinary(3, Some(bytes.to_vec()),),
5041                None
5042            ))
5043        );
5044    }
5045
5046    #[test]
5047    fn simplify_cast_literal() {
5048        // Test that CAST(literal) expressions are evaluated at plan time
5049
5050        // CAST(123 AS Int64) should become 123i64
5051        let expr = Expr::Cast(Cast::new(Box::new(lit(123i32)), DataType::Int64));
5052        let expected = lit(123i64);
5053        assert_eq!(simplify(expr), expected);
5054
5055        // CAST(1761630189642 AS Timestamp(Nanosecond, Some("+00:00")))
5056        // Integer to timestamp cast
5057        let expr = Expr::Cast(Cast::new(
5058            Box::new(lit(1761630189642i64)),
5059            DataType::Timestamp(
5060                arrow::datatypes::TimeUnit::Nanosecond,
5061                Some("+00:00".into()),
5062            ),
5063        ));
5064        // Should evaluate to a timestamp literal
5065        let result = simplify(expr);
5066        match result {
5067            Expr::Literal(ScalarValue::TimestampNanosecond(Some(val), tz), _) => {
5068                assert_eq!(val, 1761630189642i64);
5069                assert_eq!(tz.as_deref(), Some("+00:00"));
5070            }
5071            other => panic!("Expected TimestampNanosecond literal, got: {other:?}"),
5072        }
5073
5074        // Test CAST of invalid string to timestamp - should return an error at plan time
5075        // This represents the case from the issue: CAST(Utf8("1761630189642") AS Timestamp)
5076        // "1761630189642" is NOT a valid timestamp string format
5077        let expr = Expr::Cast(Cast::new(
5078            Box::new(lit("1761630189642")),
5079            DataType::Timestamp(
5080                arrow::datatypes::TimeUnit::Nanosecond,
5081                Some("+00:00".into()),
5082            ),
5083        ));
5084
5085        // The simplification should now fail with an error at plan time
5086        let schema = test_schema();
5087        let props = ExecutionProps::new();
5088        let simplifier =
5089            ExprSimplifier::new(SimplifyContext::new(&props).with_schema(schema));
5090        let result = simplifier.simplify(expr);
5091        assert!(result.is_err(), "Expected error for invalid cast");
5092        let err_msg = result.unwrap_err().to_string();
5093        assert_contains!(err_msg, "Error parsing timestamp");
5094    }
5095
5096    fn if_not_null(expr: Expr, then: bool) -> Expr {
5097        Expr::Case(Case {
5098            expr: Some(expr.is_not_null().into()),
5099            when_then_expr: vec![(lit(true).into(), lit(then).into())],
5100            else_expr: None,
5101        })
5102    }
5103}