datafusion_expr/
expr.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Logical Expressions: [`Expr`]
19
20use std::cmp::Ordering;
21use std::collections::HashSet;
22use std::fmt::{self, Display, Formatter, Write};
23use std::hash::{Hash, Hasher};
24use std::mem;
25use std::sync::Arc;
26
27use crate::expr_fn::binary_expr;
28use crate::function::WindowFunctionSimplification;
29use crate::logical_plan::Subquery;
30use crate::{AggregateUDF, Volatility};
31use crate::{ExprSchemable, Operator, Signature, WindowFrame, WindowUDF};
32
33use arrow::datatypes::{DataType, Field, FieldRef};
34use datafusion_common::cse::{HashNode, NormalizeEq, Normalizeable};
35use datafusion_common::tree_node::{
36    Transformed, TransformedResult, TreeNode, TreeNodeContainer, TreeNodeRecursion,
37};
38use datafusion_common::{
39    Column, DFSchema, HashMap, Result, ScalarValue, Spans, TableReference,
40};
41use datafusion_functions_window_common::field::WindowUDFFieldArgs;
42#[cfg(feature = "sql")]
43use sqlparser::ast::{
44    display_comma_separated, ExceptSelectItem, ExcludeSelectItem, IlikeSelectItem,
45    RenameSelectItem, ReplaceSelectElement,
46};
47
48// Moved in 51.0.0 to datafusion_common
49pub use datafusion_common::metadata::FieldMetadata;
50use datafusion_common::metadata::ScalarAndMetadata;
51
52// This mirrors sqlparser::ast::NullTreatment but we need our own variant
53// for when the sql feature is disabled.
54#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Ord, PartialOrd)]
55pub enum NullTreatment {
56    IgnoreNulls,
57    RespectNulls,
58}
59
60impl Display for NullTreatment {
61    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
62        f.write_str(match self {
63            NullTreatment::IgnoreNulls => "IGNORE NULLS",
64            NullTreatment::RespectNulls => "RESPECT NULLS",
65        })
66    }
67}
68
69#[cfg(feature = "sql")]
70impl From<sqlparser::ast::NullTreatment> for NullTreatment {
71    fn from(value: sqlparser::ast::NullTreatment) -> Self {
72        match value {
73            sqlparser::ast::NullTreatment::IgnoreNulls => Self::IgnoreNulls,
74            sqlparser::ast::NullTreatment::RespectNulls => Self::RespectNulls,
75        }
76    }
77}
78
79/// Represents logical expressions such as `A + 1`, or `CAST(c1 AS int)`.
80///
81/// For example the expression `A + 1` will be represented as
82///
83///```text
84///  BinaryExpr {
85///    left: Expr::Column("A"),
86///    op: Operator::Plus,
87///    right: Expr::Literal(ScalarValue::Int32(Some(1)), None)
88/// }
89/// ```
90///
91/// # Creating Expressions
92///
93/// `Expr`s can be created directly, but it is often easier and less verbose to
94/// use the fluent APIs in [`crate::expr_fn`] such as [`col`] and [`lit`], or
95/// methods such as [`Expr::alias`], [`Expr::cast_to`], and [`Expr::Like`]).
96///
97/// See also [`ExprFunctionExt`] for creating aggregate and window functions.
98///
99/// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt
100///
101/// # Printing Expressions
102///
103/// You can print `Expr`s using the `Debug` trait, `Display` trait, or
104/// [`Self::human_display`]. See the [examples](#examples-displaying-exprs) below.
105///
106/// If you need  SQL to pass to other systems, consider using [`Unparser`].
107///
108/// [`Unparser`]: https://docs.rs/datafusion/latest/datafusion/sql/unparser/struct.Unparser.html
109///
110/// # Schema Access
111///
112/// See [`ExprSchemable::get_type`] to access the [`DataType`] and nullability
113/// of an `Expr`.
114///
115/// # Visiting and Rewriting `Expr`s
116///
117/// The `Expr` struct implements the [`TreeNode`] trait for walking and
118/// rewriting expressions. For example [`TreeNode::apply`] recursively visits an
119/// `Expr` and [`TreeNode::transform`] can be used to rewrite an expression. See
120/// the examples below and [`TreeNode`] for more information.
121///
122/// # Examples: Creating and Using `Expr`s
123///
124/// ## Column References and Literals
125///
126/// [`Expr::Column`] refer to the values of columns and are often created with
127/// the [`col`] function. For example to create an expression `c1` referring to
128/// column named "c1":
129///
130/// [`col`]: crate::expr_fn::col
131///
132/// ```
133/// # use datafusion_common::Column;
134/// # use datafusion_expr::{lit, col, Expr};
135/// let expr = col("c1");
136/// assert_eq!(expr, Expr::Column(Column::from_name("c1")));
137/// ```
138///
139/// [`Expr::Literal`] refer to literal, or constant, values. These are created
140/// with the [`lit`] function. For example to create an expression `42`:
141///
142/// [`lit`]: crate::lit
143///
144/// ```
145/// # use datafusion_common::{Column, ScalarValue};
146/// # use datafusion_expr::{lit, col, Expr};
147/// // All literals are strongly typed in DataFusion. To make an `i64` 42:
148/// let expr = lit(42i64);
149/// assert_eq!(expr, Expr::Literal(ScalarValue::Int64(Some(42)), None));
150/// assert_eq!(expr, Expr::Literal(ScalarValue::Int64(Some(42)), None));
151/// // To make a (typed) NULL:
152/// let expr = Expr::Literal(ScalarValue::Int64(None), None);
153/// // to make an (untyped) NULL (the optimizer will coerce this to the correct type):
154/// let expr = lit(ScalarValue::Null);
155/// ```
156///
157/// ## Binary Expressions
158///
159/// Exprs implement traits that allow easy to understand construction of more
160/// complex expressions. For example, to create `c1 + c2` to add columns "c1" and
161/// "c2" together
162///
163/// ```
164/// # use datafusion_expr::{lit, col, Operator, Expr};
165/// // Use the `+` operator to add two columns together
166/// let expr = col("c1") + col("c2");
167/// assert!(matches!(expr, Expr::BinaryExpr { .. }));
168/// if let Expr::BinaryExpr(binary_expr) = expr {
169///     assert_eq!(*binary_expr.left, col("c1"));
170///     assert_eq!(*binary_expr.right, col("c2"));
171///     assert_eq!(binary_expr.op, Operator::Plus);
172/// }
173/// ```
174///
175/// The expression `c1 = 42` to compares the value in column "c1" to the
176/// literal value `42`:
177///
178/// ```
179/// # use datafusion_common::ScalarValue;
180/// # use datafusion_expr::{lit, col, Operator, Expr};
181/// let expr = col("c1").eq(lit(42_i32));
182/// assert!(matches!(expr, Expr::BinaryExpr { .. }));
183/// if let Expr::BinaryExpr(binary_expr) = expr {
184///     assert_eq!(*binary_expr.left, col("c1"));
185///     let scalar = ScalarValue::Int32(Some(42));
186///     assert_eq!(*binary_expr.right, Expr::Literal(scalar, None));
187///     assert_eq!(binary_expr.op, Operator::Eq);
188/// }
189/// ```
190///
191/// Here is how to implement the equivalent of `SELECT *` to select all
192/// [`Expr::Column`] from a [`DFSchema`]'s columns:
193///
194/// ```
195/// # use arrow::datatypes::{DataType, Field, Schema};
196/// # use datafusion_common::{DFSchema, Column};
197/// # use datafusion_expr::Expr;
198/// // Create a schema c1(int, c2 float)
199/// let arrow_schema = Schema::new(vec![
200///     Field::new("c1", DataType::Int32, false),
201///     Field::new("c2", DataType::Float64, false),
202/// ]);
203/// // DFSchema is a an Arrow schema with optional relation name
204/// let df_schema = DFSchema::try_from_qualified_schema("t1", &arrow_schema).unwrap();
205///
206/// // Form Vec<Expr> with an expression for each column in the schema
207/// let exprs: Vec<_> = df_schema.iter().map(Expr::from).collect();
208///
209/// assert_eq!(
210///     exprs,
211///     vec![
212///         Expr::from(Column::from_qualified_name("t1.c1")),
213///         Expr::from(Column::from_qualified_name("t1.c2")),
214///     ]
215/// );
216/// ```
217///
218/// # Examples: Displaying `Exprs`
219///
220/// There are three ways to print an `Expr` depending on the usecase.
221///
222/// ## Use `Debug` trait
223///
224/// Following Rust conventions, the `Debug` implementation prints out the
225/// internal structure of the expression, which is useful for debugging.
226///
227/// ```
228/// # use datafusion_expr::{lit, col};
229/// let expr = col("c1") + lit(42);
230/// assert_eq!(format!("{expr:?}"), "BinaryExpr(BinaryExpr { left: Column(Column { relation: None, name: \"c1\" }), op: Plus, right: Literal(Int32(42), None) })");
231/// ```
232///
233/// ## Use the `Display` trait  (detailed expression)
234///
235/// The `Display` implementation prints out the expression in a SQL-like form,
236/// but has additional details such as the data type of literals. This is useful
237/// for understanding the expression in more detail and is used for the low level
238/// [`ExplainFormat::Indent`] explain plan format.
239///
240/// [`ExplainFormat::Indent`]: crate::logical_plan::ExplainFormat::Indent
241///
242/// ```
243/// # use datafusion_expr::{lit, col};
244/// let expr = col("c1") + lit(42);
245/// assert_eq!(format!("{expr}"), "c1 + Int32(42)");
246/// ```
247///
248/// ## Use [`Self::human_display`] (human readable)
249///
250/// [`Self::human_display`]  prints out the expression in a SQL-like form, optimized
251/// for human consumption by end users. It is used for the
252/// [`ExplainFormat::Tree`] explain plan format.
253///
254/// [`ExplainFormat::Tree`]: crate::logical_plan::ExplainFormat::Tree
255///
256///```
257/// # use datafusion_expr::{lit, col};
258/// let expr = col("c1") + lit(42);
259/// assert_eq!(format!("{}", expr.human_display()), "c1 + 42");
260/// ```
261///
262/// # Examples: Visiting and Rewriting `Expr`s
263///
264/// Here is an example that finds all literals in an `Expr` tree:
265/// ```
266/// # use std::collections::{HashSet};
267/// use datafusion_common::ScalarValue;
268/// # use datafusion_expr::{col, Expr, lit};
269/// use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion};
270/// // Expression a = 5 AND b = 6
271/// let expr = col("a").eq(lit(5)) & col("b").eq(lit(6));
272/// // find all literals in a HashMap
273/// let mut scalars = HashSet::new();
274/// // apply recursively visits all nodes in the expression tree
275/// expr.apply(|e| {
276///     if let Expr::Literal(scalar, _) = e {
277///         scalars.insert(scalar);
278///     }
279///     // The return value controls whether to continue visiting the tree
280///     Ok(TreeNodeRecursion::Continue)
281/// })
282/// .unwrap();
283/// // All subtrees have been visited and literals found
284/// assert_eq!(scalars.len(), 2);
285/// assert!(scalars.contains(&ScalarValue::Int32(Some(5))));
286/// assert!(scalars.contains(&ScalarValue::Int32(Some(6))));
287/// ```
288///
289/// Rewrite an expression, replacing references to column "a" in an
290/// to the literal `42`:
291///
292///  ```
293/// # use datafusion_common::tree_node::{Transformed, TreeNode};
294/// # use datafusion_expr::{col, Expr, lit};
295/// // expression a = 5 AND b = 6
296/// let expr = col("a").eq(lit(5)).and(col("b").eq(lit(6)));
297/// // rewrite all references to column "a" to the literal 42
298/// let rewritten = expr.transform(|e| {
299///   if let Expr::Column(c) = &e {
300///     if &c.name == "a" {
301///       // return Transformed::yes to indicate the node was changed
302///       return Ok(Transformed::yes(lit(42)))
303///     }
304///   }
305///   // return Transformed::no to indicate the node was not changed
306///   Ok(Transformed::no(e))
307/// }).unwrap();
308/// // The expression has been rewritten
309/// assert!(rewritten.transformed);
310/// // to 42 = 5 AND b = 6
311/// assert_eq!(rewritten.data, lit(42).eq(lit(5)).and(col("b").eq(lit(6))));
312#[derive(Clone, PartialEq, PartialOrd, Eq, Debug, Hash)]
313pub enum Expr {
314    /// An expression with a specific name.
315    Alias(Alias),
316    /// A named reference to a qualified field in a schema.
317    Column(Column),
318    /// A named reference to a variable in a registry.
319    ScalarVariable(DataType, Vec<String>),
320    /// A constant value along with associated [`FieldMetadata`].
321    Literal(ScalarValue, Option<FieldMetadata>),
322    /// A binary expression such as "age > 21"
323    BinaryExpr(BinaryExpr),
324    /// LIKE expression
325    Like(Like),
326    /// LIKE expression that uses regular expressions
327    SimilarTo(Like),
328    /// Negation of an expression. The expression's type must be a boolean to make sense.
329    Not(Box<Expr>),
330    /// True if argument is not NULL, false otherwise. This expression itself is never NULL.
331    IsNotNull(Box<Expr>),
332    /// True if argument is NULL, false otherwise. This expression itself is never NULL.
333    IsNull(Box<Expr>),
334    /// True if argument is true, false otherwise. This expression itself is never NULL.
335    IsTrue(Box<Expr>),
336    /// True if argument is  false, false otherwise. This expression itself is never NULL.
337    IsFalse(Box<Expr>),
338    /// True if argument is NULL, false otherwise. This expression itself is never NULL.
339    IsUnknown(Box<Expr>),
340    /// True if argument is FALSE or NULL, false otherwise. This expression itself is never NULL.
341    IsNotTrue(Box<Expr>),
342    /// True if argument is TRUE OR NULL, false otherwise. This expression itself is never NULL.
343    IsNotFalse(Box<Expr>),
344    /// True if argument is TRUE or FALSE, false otherwise. This expression itself is never NULL.
345    IsNotUnknown(Box<Expr>),
346    /// arithmetic negation of an expression, the operand must be of a signed numeric data type
347    Negative(Box<Expr>),
348    /// Whether an expression is between a given range.
349    Between(Between),
350    /// A CASE expression (see docs on [`Case`])
351    Case(Case),
352    /// Casts the expression to a given type and will return a runtime error if the expression cannot be cast.
353    /// This expression is guaranteed to have a fixed type.
354    Cast(Cast),
355    /// Casts the expression to a given type and will return a null value if the expression cannot be cast.
356    /// This expression is guaranteed to have a fixed type.
357    TryCast(TryCast),
358    /// Call a scalar function with a set of arguments.
359    ScalarFunction(ScalarFunction),
360    /// Calls an aggregate function with arguments, and optional
361    /// `ORDER BY`, `FILTER`, `DISTINCT` and `NULL TREATMENT`.
362    ///
363    /// See also [`ExprFunctionExt`] to set these fields.
364    ///
365    /// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt
366    AggregateFunction(AggregateFunction),
367    /// Call a window function with a set of arguments.
368    WindowFunction(Box<WindowFunction>),
369    /// Returns whether the list contains the expr value.
370    InList(InList),
371    /// EXISTS subquery
372    Exists(Exists),
373    /// IN subquery
374    InSubquery(InSubquery),
375    /// Scalar subquery
376    ScalarSubquery(Subquery),
377    /// Represents a reference to all available fields in a specific schema,
378    /// with an optional (schema) qualifier.
379    ///
380    /// This expr has to be resolved to a list of columns before translating logical
381    /// plan into physical plan.
382    #[deprecated(
383        since = "46.0.0",
384        note = "A wildcard needs to be resolved to concrete expressions when constructing the logical plan. See https://github.com/apache/datafusion/issues/7765"
385    )]
386    Wildcard {
387        qualifier: Option<TableReference>,
388        options: Box<WildcardOptions>,
389    },
390    /// List of grouping set expressions. Only valid in the context of an aggregate
391    /// GROUP BY expression list
392    GroupingSet(GroupingSet),
393    /// A place holder for parameters in a prepared statement
394    /// (e.g. `$foo` or `$1`)
395    Placeholder(Placeholder),
396    /// A placeholder which holds a reference to a qualified field
397    /// in the outer query, used for correlated sub queries.
398    OuterReferenceColumn(FieldRef, Column),
399    /// Unnest expression
400    Unnest(Unnest),
401    /// Lambda expression, valid only as a scalar function argument
402    /// Note that it has it's own scoped schema, different from the plan schema,
403    /// that can be constructed with ScalarUDF::arguments_schemas and variants
404    Lambda(Lambda),
405}
406
407impl Default for Expr {
408    fn default() -> Self {
409        Expr::Literal(ScalarValue::Null, None)
410    }
411}
412
413impl AsRef<Expr> for Expr {
414    fn as_ref(&self) -> &Expr {
415        self
416    }
417}
418
419/// Create an [`Expr`] from a [`Column`]
420impl From<Column> for Expr {
421    fn from(value: Column) -> Self {
422        Expr::Column(value)
423    }
424}
425
426/// Create an [`Expr`] from a [`WindowFunction`]
427impl From<WindowFunction> for Expr {
428    fn from(value: WindowFunction) -> Self {
429        Expr::WindowFunction(Box::new(value))
430    }
431}
432
433/// Create an [`Expr`] from an [`ScalarAndMetadata`]
434impl From<ScalarAndMetadata> for Expr {
435    fn from(value: ScalarAndMetadata) -> Self {
436        let (value, metadata) = value.into_inner();
437        Expr::Literal(value, metadata)
438    }
439}
440
441/// Create an [`Expr`] from an optional qualifier and a [`FieldRef`]. This is
442/// useful for creating [`Expr`] from a [`DFSchema`].
443///
444/// See example on [`Expr`]
445impl<'a> From<(Option<&'a TableReference>, &'a FieldRef)> for Expr {
446    fn from(value: (Option<&'a TableReference>, &'a FieldRef)) -> Self {
447        Expr::from(Column::from(value))
448    }
449}
450
451impl<'a> TreeNodeContainer<'a, Self> for Expr {
452    fn apply_elements<F: FnMut(&'a Self) -> Result<TreeNodeRecursion>>(
453        &'a self,
454        mut f: F,
455    ) -> Result<TreeNodeRecursion> {
456        f(self)
457    }
458
459    fn map_elements<F: FnMut(Self) -> Result<Transformed<Self>>>(
460        self,
461        mut f: F,
462    ) -> Result<Transformed<Self>> {
463        f(self)
464    }
465}
466
467/// The metadata used in [`Field::metadata`].
468///
469/// This represents the metadata associated with an Arrow [`Field`]. The metadata consists of key-value pairs.
470///
471/// # Common Use Cases
472///
473/// Field metadata is commonly used to store:
474/// - Default values for columns when data is missing
475/// - Column descriptions or documentation
476/// - Data lineage information
477/// - Custom application-specific annotations
478/// - Encoding hints or display formatting preferences
479///
480/// # Example: Storing Default Values
481///
482/// A practical example of using field metadata is storing default values for columns
483/// that may be missing in the physical data but present in the logical schema.
484/// See the [default_column_values.rs] example implementation.
485///
486/// [default_column_values.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/default_column_values.rs
487pub type SchemaFieldMetadata = std::collections::HashMap<String, String>;
488
489/// Intersects multiple metadata instances for UNION operations.
490///
491/// This function implements the intersection strategy used by UNION operations,
492/// where only metadata keys that exist in ALL inputs with identical values
493/// are preserved in the result.
494///
495/// # Union Metadata Behavior
496///
497/// Union operations require consistent metadata across all branches:
498/// - Only metadata keys present in ALL union branches are kept
499/// - For each kept key, the value must be identical across all branches
500/// - If a key has different values across branches, it is excluded from the result
501/// - If any input has no metadata, the result will be empty
502///
503/// # Arguments
504///
505/// * `metadatas` - An iterator of `SchemaFieldMetadata` instances to intersect
506///
507/// # Returns
508///
509/// A new `SchemaFieldMetadata` containing only the intersected metadata
510pub fn intersect_metadata_for_union<'a>(
511    metadatas: impl IntoIterator<Item = &'a SchemaFieldMetadata>,
512) -> SchemaFieldMetadata {
513    let mut metadatas = metadatas.into_iter();
514    let Some(mut intersected) = metadatas.next().cloned() else {
515        return Default::default();
516    };
517
518    for metadata in metadatas {
519        // Only keep keys that exist in both with the same value
520        intersected.retain(|k, v| metadata.get(k) == Some(v));
521    }
522
523    intersected
524}
525
526/// UNNEST expression.
527#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
528pub struct Unnest {
529    pub expr: Box<Expr>,
530}
531
532impl Unnest {
533    /// Create a new Unnest expression.
534    pub fn new(expr: Expr) -> Self {
535        Self {
536            expr: Box::new(expr),
537        }
538    }
539
540    /// Create a new Unnest expression.
541    pub fn new_boxed(boxed: Box<Expr>) -> Self {
542        Self { expr: boxed }
543    }
544}
545
546/// Alias expression
547#[derive(Clone, PartialEq, Eq, Debug)]
548pub struct Alias {
549    pub expr: Box<Expr>,
550    pub relation: Option<TableReference>,
551    pub name: String,
552    pub metadata: Option<FieldMetadata>,
553}
554
555impl Hash for Alias {
556    fn hash<H: Hasher>(&self, state: &mut H) {
557        self.expr.hash(state);
558        self.relation.hash(state);
559        self.name.hash(state);
560    }
561}
562
563impl PartialOrd for Alias {
564    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
565        let cmp = self.expr.partial_cmp(&other.expr);
566        let Some(Ordering::Equal) = cmp else {
567            return cmp;
568        };
569        let cmp = self.relation.partial_cmp(&other.relation);
570        let Some(Ordering::Equal) = cmp else {
571            return cmp;
572        };
573        self.name
574            .partial_cmp(&other.name)
575            // TODO (https://github.com/apache/datafusion/issues/17477) avoid recomparing all fields
576            .filter(|cmp| *cmp != Ordering::Equal || self == other)
577    }
578}
579
580impl Alias {
581    /// Create an alias with an optional schema/field qualifier.
582    pub fn new(
583        expr: Expr,
584        relation: Option<impl Into<TableReference>>,
585        name: impl Into<String>,
586    ) -> Self {
587        Self {
588            expr: Box::new(expr),
589            relation: relation.map(|r| r.into()),
590            name: name.into(),
591            metadata: None,
592        }
593    }
594
595    pub fn with_metadata(mut self, metadata: Option<FieldMetadata>) -> Self {
596        self.metadata = metadata;
597        self
598    }
599}
600
601/// Binary expression
602#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
603pub struct BinaryExpr {
604    /// Left-hand side of the expression
605    pub left: Box<Expr>,
606    /// The comparison operator
607    pub op: Operator,
608    /// Right-hand side of the expression
609    pub right: Box<Expr>,
610}
611
612impl BinaryExpr {
613    /// Create a new binary expression
614    pub fn new(left: Box<Expr>, op: Operator, right: Box<Expr>) -> Self {
615        Self { left, op, right }
616    }
617}
618
619impl Display for BinaryExpr {
620    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
621        // Put parentheses around child binary expressions so that we can see the difference
622        // between `(a OR b) AND c` and `a OR (b AND c)`. We only insert parentheses when needed,
623        // based on operator precedence. For example, `(a AND b) OR c` and `a AND b OR c` are
624        // equivalent and the parentheses are not necessary.
625
626        fn write_child(
627            f: &mut Formatter<'_>,
628            expr: &Expr,
629            precedence: u8,
630        ) -> fmt::Result {
631            match expr {
632                Expr::BinaryExpr(child) => {
633                    let p = child.op.precedence();
634                    if p == 0 || p < precedence {
635                        write!(f, "({child})")?;
636                    } else {
637                        write!(f, "{child}")?;
638                    }
639                }
640                _ => write!(f, "{expr}")?,
641            }
642            Ok(())
643        }
644
645        let precedence = self.op.precedence();
646        write_child(f, self.left.as_ref(), precedence)?;
647        write!(f, " {} ", self.op)?;
648        write_child(f, self.right.as_ref(), precedence)
649    }
650}
651
652/// CASE expression
653///
654/// The CASE expression is similar to a series of nested if/else and there are two forms that
655/// can be used. The first form consists of a series of boolean "when" expressions with
656/// corresponding "then" expressions, and an optional "else" expression.
657///
658/// ```text
659/// CASE WHEN condition THEN result
660///      [WHEN ...]
661///      [ELSE result]
662/// END
663/// ```
664///
665/// The second form uses a base expression and then a series of "when" clauses that match on a
666/// literal value.
667///
668/// ```text
669/// CASE expression
670///     WHEN value THEN result
671///     [WHEN ...]
672///     [ELSE result]
673/// END
674/// ```
675#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Hash)]
676pub struct Case {
677    /// Optional base expression that can be compared to literal values in the "when" expressions
678    pub expr: Option<Box<Expr>>,
679    /// One or more when/then expressions
680    pub when_then_expr: Vec<(Box<Expr>, Box<Expr>)>,
681    /// Optional "else" expression
682    pub else_expr: Option<Box<Expr>>,
683}
684
685impl Case {
686    /// Create a new Case expression
687    pub fn new(
688        expr: Option<Box<Expr>>,
689        when_then_expr: Vec<(Box<Expr>, Box<Expr>)>,
690        else_expr: Option<Box<Expr>>,
691    ) -> Self {
692        Self {
693            expr,
694            when_then_expr,
695            else_expr,
696        }
697    }
698}
699
700/// LIKE expression
701#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
702pub struct Like {
703    pub negated: bool,
704    pub expr: Box<Expr>,
705    pub pattern: Box<Expr>,
706    pub escape_char: Option<char>,
707    /// Whether to ignore case on comparing
708    pub case_insensitive: bool,
709}
710
711impl Like {
712    /// Create a new Like expression
713    pub fn new(
714        negated: bool,
715        expr: Box<Expr>,
716        pattern: Box<Expr>,
717        escape_char: Option<char>,
718        case_insensitive: bool,
719    ) -> Self {
720        Self {
721            negated,
722            expr,
723            pattern,
724            escape_char,
725            case_insensitive,
726        }
727    }
728}
729
730/// BETWEEN expression
731#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
732pub struct Between {
733    /// The value to compare
734    pub expr: Box<Expr>,
735    /// Whether the expression is negated
736    pub negated: bool,
737    /// The low end of the range
738    pub low: Box<Expr>,
739    /// The high end of the range
740    pub high: Box<Expr>,
741}
742
743impl Between {
744    /// Create a new Between expression
745    pub fn new(expr: Box<Expr>, negated: bool, low: Box<Expr>, high: Box<Expr>) -> Self {
746        Self {
747            expr,
748            negated,
749            low,
750            high,
751        }
752    }
753}
754
755/// Invoke a [`ScalarUDF`] with a set of arguments
756///
757/// [`ScalarUDF`]: crate::ScalarUDF
758#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
759pub struct ScalarFunction {
760    /// The function
761    pub func: Arc<crate::ScalarUDF>,
762    /// List of expressions to feed to the functions as arguments
763    pub args: Vec<Expr>,
764}
765
766impl ScalarFunction {
767    // return the Function's name
768    pub fn name(&self) -> &str {
769        self.func.name()
770    }
771}
772
773impl ScalarFunction {
774    /// Create a new `ScalarFunction` from a [`ScalarUDF`]
775    ///
776    /// [`ScalarUDF`]: crate::ScalarUDF
777    pub fn new_udf(udf: Arc<crate::ScalarUDF>, args: Vec<Expr>) -> Self {
778        Self { func: udf, args }
779    }
780}
781
782/// Access a sub field of a nested type, such as `Field` or `List`
783#[derive(Clone, PartialEq, Eq, Hash, Debug)]
784pub enum GetFieldAccess {
785    /// Named field, for example `struct["name"]`
786    NamedStructField { name: ScalarValue },
787    /// Single list index, for example: `list[i]`
788    ListIndex { key: Box<Expr> },
789    /// List stride, for example `list[i:j:k]`
790    ListRange {
791        start: Box<Expr>,
792        stop: Box<Expr>,
793        stride: Box<Expr>,
794    },
795}
796
797/// Cast expression
798#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
799pub struct Cast {
800    /// The expression being cast
801    pub expr: Box<Expr>,
802    /// The `DataType` the expression will yield
803    pub data_type: DataType,
804}
805
806impl Cast {
807    /// Create a new Cast expression
808    pub fn new(expr: Box<Expr>, data_type: DataType) -> Self {
809        Self { expr, data_type }
810    }
811}
812
813/// TryCast Expression
814#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
815pub struct TryCast {
816    /// The expression being cast
817    pub expr: Box<Expr>,
818    /// The `DataType` the expression will yield
819    pub data_type: DataType,
820}
821
822impl TryCast {
823    /// Create a new TryCast expression
824    pub fn new(expr: Box<Expr>, data_type: DataType) -> Self {
825        Self { expr, data_type }
826    }
827}
828
829/// SORT expression
830#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
831pub struct Sort {
832    /// The expression to sort on
833    pub expr: Expr,
834    /// The direction of the sort
835    pub asc: bool,
836    /// Whether to put Nulls before all other data values
837    pub nulls_first: bool,
838}
839
840impl Sort {
841    /// Create a new Sort expression
842    pub fn new(expr: Expr, asc: bool, nulls_first: bool) -> Self {
843        Self {
844            expr,
845            asc,
846            nulls_first,
847        }
848    }
849
850    /// Create a new Sort expression with the opposite sort direction
851    pub fn reverse(&self) -> Self {
852        Self {
853            expr: self.expr.clone(),
854            asc: !self.asc,
855            nulls_first: !self.nulls_first,
856        }
857    }
858
859    /// Replaces the Sort expressions with `expr`
860    pub fn with_expr(&self, expr: Expr) -> Self {
861        Self {
862            expr,
863            asc: self.asc,
864            nulls_first: self.nulls_first,
865        }
866    }
867}
868
869impl Display for Sort {
870    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
871        write!(f, "{}", self.expr)?;
872        if self.asc {
873            write!(f, " ASC")?;
874        } else {
875            write!(f, " DESC")?;
876        }
877        if self.nulls_first {
878            write!(f, " NULLS FIRST")?;
879        } else {
880            write!(f, " NULLS LAST")?;
881        }
882        Ok(())
883    }
884}
885
886impl<'a> TreeNodeContainer<'a, Expr> for Sort {
887    fn apply_elements<F: FnMut(&'a Expr) -> Result<TreeNodeRecursion>>(
888        &'a self,
889        f: F,
890    ) -> Result<TreeNodeRecursion> {
891        self.expr.apply_elements(f)
892    }
893
894    fn map_elements<F: FnMut(Expr) -> Result<Transformed<Expr>>>(
895        self,
896        f: F,
897    ) -> Result<Transformed<Self>> {
898        self.expr
899            .map_elements(f)?
900            .map_data(|expr| Ok(Self { expr, ..self }))
901    }
902}
903
904/// Aggregate function
905///
906/// See also  [`ExprFunctionExt`] to set these fields on `Expr`
907///
908/// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt
909#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
910pub struct AggregateFunction {
911    /// Name of the function
912    pub func: Arc<AggregateUDF>,
913    pub params: AggregateFunctionParams,
914}
915
916#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
917pub struct AggregateFunctionParams {
918    pub args: Vec<Expr>,
919    /// Whether this is a DISTINCT aggregation or not
920    pub distinct: bool,
921    /// Optional filter
922    pub filter: Option<Box<Expr>>,
923    /// Optional ordering
924    pub order_by: Vec<Sort>,
925    pub null_treatment: Option<NullTreatment>,
926}
927
928impl AggregateFunction {
929    /// Create a new AggregateFunction expression with a user-defined function (UDF)
930    pub fn new_udf(
931        func: Arc<AggregateUDF>,
932        args: Vec<Expr>,
933        distinct: bool,
934        filter: Option<Box<Expr>>,
935        order_by: Vec<Sort>,
936        null_treatment: Option<NullTreatment>,
937    ) -> Self {
938        Self {
939            func,
940            params: AggregateFunctionParams {
941                args,
942                distinct,
943                filter,
944                order_by,
945                null_treatment,
946            },
947        }
948    }
949}
950
951/// A function used as a SQL window function
952///
953/// In SQL, you can use:
954/// - Actual window functions ([`WindowUDF`])
955/// - Normal aggregate functions ([`AggregateUDF`])
956#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
957pub enum WindowFunctionDefinition {
958    /// A user defined aggregate function
959    AggregateUDF(Arc<AggregateUDF>),
960    /// A user defined aggregate function
961    WindowUDF(Arc<WindowUDF>),
962}
963
964impl WindowFunctionDefinition {
965    /// Returns the datatype of the window function
966    pub fn return_field(
967        &self,
968        input_expr_fields: &[FieldRef],
969        display_name: &str,
970    ) -> Result<FieldRef> {
971        match self {
972            WindowFunctionDefinition::AggregateUDF(fun) => {
973                fun.return_field(input_expr_fields)
974            }
975            WindowFunctionDefinition::WindowUDF(fun) => {
976                fun.field(WindowUDFFieldArgs::new(input_expr_fields, display_name))
977            }
978        }
979    }
980
981    /// The signatures supported by the function `fun`.
982    pub fn signature(&self) -> Signature {
983        match self {
984            WindowFunctionDefinition::AggregateUDF(fun) => fun.signature().clone(),
985            WindowFunctionDefinition::WindowUDF(fun) => fun.signature().clone(),
986        }
987    }
988
989    /// Function's name for display
990    pub fn name(&self) -> &str {
991        match self {
992            WindowFunctionDefinition::WindowUDF(fun) => fun.name(),
993            WindowFunctionDefinition::AggregateUDF(fun) => fun.name(),
994        }
995    }
996
997    /// Return the inner window simplification function, if any
998    ///
999    /// See [`WindowFunctionSimplification`] for more information
1000    pub fn simplify(&self) -> Option<WindowFunctionSimplification> {
1001        match self {
1002            WindowFunctionDefinition::AggregateUDF(_) => None,
1003            WindowFunctionDefinition::WindowUDF(udwf) => udwf.simplify(),
1004        }
1005    }
1006}
1007
1008impl Display for WindowFunctionDefinition {
1009    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1010        match self {
1011            WindowFunctionDefinition::AggregateUDF(fun) => Display::fmt(fun, f),
1012            WindowFunctionDefinition::WindowUDF(fun) => Display::fmt(fun, f),
1013        }
1014    }
1015}
1016
1017impl From<Arc<AggregateUDF>> for WindowFunctionDefinition {
1018    fn from(value: Arc<AggregateUDF>) -> Self {
1019        Self::AggregateUDF(value)
1020    }
1021}
1022
1023impl From<Arc<WindowUDF>> for WindowFunctionDefinition {
1024    fn from(value: Arc<WindowUDF>) -> Self {
1025        Self::WindowUDF(value)
1026    }
1027}
1028
1029/// Window function
1030///
1031/// Holds the actual function to call [`WindowFunction`] as well as its
1032/// arguments (`args`) and the contents of the `OVER` clause:
1033///
1034/// 1. `PARTITION BY`
1035/// 2. `ORDER BY`
1036/// 3. Window frame (e.g. `ROWS 1 PRECEDING AND 1 FOLLOWING`)
1037///
1038/// See [`ExprFunctionExt`] for examples of how to create a `WindowFunction`.
1039///
1040/// [`ExprFunctionExt`]: crate::ExprFunctionExt
1041#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1042pub struct WindowFunction {
1043    /// Name of the function
1044    pub fun: WindowFunctionDefinition,
1045    pub params: WindowFunctionParams,
1046}
1047
1048#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1049pub struct WindowFunctionParams {
1050    /// List of expressions to feed to the functions as arguments
1051    pub args: Vec<Expr>,
1052    /// List of partition by expressions
1053    pub partition_by: Vec<Expr>,
1054    /// List of order by expressions
1055    pub order_by: Vec<Sort>,
1056    /// Window frame
1057    pub window_frame: WindowFrame,
1058    /// Optional filter expression (FILTER (WHERE ...))
1059    pub filter: Option<Box<Expr>>,
1060    /// Specifies how NULL value is treated: ignore or respect
1061    pub null_treatment: Option<NullTreatment>,
1062    /// Distinct flag
1063    pub distinct: bool,
1064}
1065
1066impl WindowFunction {
1067    /// Create a new Window expression with the specified argument an
1068    /// empty `OVER` clause
1069    pub fn new(fun: impl Into<WindowFunctionDefinition>, args: Vec<Expr>) -> Self {
1070        Self {
1071            fun: fun.into(),
1072            params: WindowFunctionParams {
1073                args,
1074                partition_by: Vec::default(),
1075                order_by: Vec::default(),
1076                window_frame: WindowFrame::new(None),
1077                filter: None,
1078                null_treatment: None,
1079                distinct: false,
1080            },
1081        }
1082    }
1083
1084    /// Return the inner window simplification function, if any
1085    ///
1086    /// See [`WindowFunctionSimplification`] for more information
1087    pub fn simplify(&self) -> Option<WindowFunctionSimplification> {
1088        self.fun.simplify()
1089    }
1090}
1091
1092/// EXISTS expression
1093#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1094pub struct Exists {
1095    /// Subquery that will produce a single column of data
1096    pub subquery: Subquery,
1097    /// Whether the expression is negated
1098    pub negated: bool,
1099}
1100
1101impl Exists {
1102    // Create a new Exists expression.
1103    pub fn new(subquery: Subquery, negated: bool) -> Self {
1104        Self { subquery, negated }
1105    }
1106}
1107
1108/// InList expression
1109#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1110pub struct InList {
1111    /// The expression to compare
1112    pub expr: Box<Expr>,
1113    /// The list of values to compare against
1114    pub list: Vec<Expr>,
1115    /// Whether the expression is negated
1116    pub negated: bool,
1117}
1118
1119impl InList {
1120    /// Create a new InList expression
1121    pub fn new(expr: Box<Expr>, list: Vec<Expr>, negated: bool) -> Self {
1122        Self {
1123            expr,
1124            list,
1125            negated,
1126        }
1127    }
1128}
1129
1130/// IN subquery
1131#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1132pub struct InSubquery {
1133    /// The expression to compare
1134    pub expr: Box<Expr>,
1135    /// Subquery that will produce a single column of data to compare against
1136    pub subquery: Subquery,
1137    /// Whether the expression is negated
1138    pub negated: bool,
1139}
1140
1141impl InSubquery {
1142    /// Create a new InSubquery expression
1143    pub fn new(expr: Box<Expr>, subquery: Subquery, negated: bool) -> Self {
1144        Self {
1145            expr,
1146            subquery,
1147            negated,
1148        }
1149    }
1150}
1151
1152/// Placeholder, representing bind parameter values such as `$1` or `$name`.
1153///
1154/// The type of these parameters is inferred using [`Expr::infer_placeholder_types`]
1155/// or can be specified directly using `PREPARE` statements.
1156#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1157pub struct Placeholder {
1158    /// The identifier of the parameter, including the leading `$` (e.g, `"$1"` or `"$foo"`)
1159    pub id: String,
1160    /// The type the parameter will be filled in with
1161    pub field: Option<FieldRef>,
1162}
1163
1164impl Placeholder {
1165    /// Create a new Placeholder expression
1166    #[deprecated(since = "51.0.0", note = "Use new_with_field instead")]
1167    pub fn new(id: String, data_type: Option<DataType>) -> Self {
1168        Self {
1169            id,
1170            field: data_type.map(|dt| Arc::new(Field::new("", dt, true))),
1171        }
1172    }
1173
1174    /// Create a new Placeholder expression from a Field
1175    pub fn new_with_field(id: String, field: Option<FieldRef>) -> Self {
1176        Self { id, field }
1177    }
1178}
1179
1180/// Grouping sets
1181///
1182/// See <https://www.postgresql.org/docs/current/queries-table-expressions.html#QUERIES-GROUPING-SETS>
1183/// for Postgres definition.
1184/// See <https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select-groupby.html>
1185/// for Apache Spark definition.
1186#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1187pub enum GroupingSet {
1188    /// Rollup grouping sets
1189    Rollup(Vec<Expr>),
1190    /// Cube grouping sets
1191    Cube(Vec<Expr>),
1192    /// User-defined grouping sets
1193    GroupingSets(Vec<Vec<Expr>>),
1194}
1195
1196impl GroupingSet {
1197    /// Return all distinct exprs in the grouping set. For `CUBE` and `ROLLUP` this
1198    /// is just the underlying list of exprs. For `GROUPING SET` we need to deduplicate
1199    /// the exprs in the underlying sets.
1200    pub fn distinct_expr(&self) -> Vec<&Expr> {
1201        match self {
1202            GroupingSet::Rollup(exprs) | GroupingSet::Cube(exprs) => {
1203                exprs.iter().collect()
1204            }
1205            GroupingSet::GroupingSets(groups) => {
1206                let mut exprs: Vec<&Expr> = vec![];
1207                for exp in groups.iter().flatten() {
1208                    if !exprs.contains(&exp) {
1209                        exprs.push(exp);
1210                    }
1211                }
1212                exprs
1213            }
1214        }
1215    }
1216}
1217
1218/// Lambda expression.
1219#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1220pub struct Lambda {
1221    pub params: Vec<String>,
1222    pub body: Box<Expr>,
1223}
1224
1225impl Lambda {
1226    /// Create a new lambda expression
1227    pub fn new(params: Vec<String>, body: Expr) -> Self {
1228        Self {
1229            params,
1230            body: Box::new(body),
1231        }
1232    }
1233}
1234
1235#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1236#[cfg(not(feature = "sql"))]
1237pub struct IlikeSelectItem {
1238    pub pattern: String,
1239}
1240#[cfg(not(feature = "sql"))]
1241impl Display for IlikeSelectItem {
1242    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1243        write!(f, "ILIKE '{}'", &self.pattern)?;
1244        Ok(())
1245    }
1246}
1247#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1248#[cfg(not(feature = "sql"))]
1249pub enum ExcludeSelectItem {
1250    Single(Ident),
1251    Multiple(Vec<Ident>),
1252}
1253#[cfg(not(feature = "sql"))]
1254impl Display for ExcludeSelectItem {
1255    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1256        write!(f, "EXCLUDE")?;
1257        match self {
1258            Self::Single(column) => {
1259                write!(f, " {column}")?;
1260            }
1261            Self::Multiple(columns) => {
1262                write!(f, " ({})", display_comma_separated(columns))?;
1263            }
1264        }
1265        Ok(())
1266    }
1267}
1268#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1269#[cfg(not(feature = "sql"))]
1270pub struct ExceptSelectItem {
1271    pub first_element: Ident,
1272    pub additional_elements: Vec<Ident>,
1273}
1274#[cfg(not(feature = "sql"))]
1275impl Display for ExceptSelectItem {
1276    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1277        write!(f, "EXCEPT ")?;
1278        if self.additional_elements.is_empty() {
1279            write!(f, "({})", self.first_element)?;
1280        } else {
1281            write!(
1282                f,
1283                "({}, {})",
1284                self.first_element,
1285                display_comma_separated(&self.additional_elements)
1286            )?;
1287        }
1288        Ok(())
1289    }
1290}
1291
1292#[cfg(not(feature = "sql"))]
1293pub fn display_comma_separated<T>(slice: &[T]) -> String
1294where
1295    T: Display,
1296{
1297    use itertools::Itertools;
1298    slice.iter().map(|v| format!("{v}")).join(", ")
1299}
1300
1301#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1302#[cfg(not(feature = "sql"))]
1303pub enum RenameSelectItem {
1304    Single(String),
1305    Multiple(Vec<String>),
1306}
1307#[cfg(not(feature = "sql"))]
1308impl Display for RenameSelectItem {
1309    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1310        write!(f, "RENAME")?;
1311        match self {
1312            Self::Single(column) => {
1313                write!(f, " {column}")?;
1314            }
1315            Self::Multiple(columns) => {
1316                write!(f, " ({})", display_comma_separated(columns))?;
1317            }
1318        }
1319        Ok(())
1320    }
1321}
1322
1323#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1324#[cfg(not(feature = "sql"))]
1325pub struct Ident {
1326    /// The value of the identifier without quotes.
1327    pub value: String,
1328    /// The starting quote if any. Valid quote characters are the single quote,
1329    /// double quote, backtick, and opening square bracket.
1330    pub quote_style: Option<char>,
1331    /// The span of the identifier in the original SQL string.
1332    pub span: String,
1333}
1334#[cfg(not(feature = "sql"))]
1335impl Display for Ident {
1336    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1337        write!(f, "[{}]", self.value)
1338    }
1339}
1340
1341#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1342#[cfg(not(feature = "sql"))]
1343pub struct ReplaceSelectElement {
1344    pub expr: String,
1345    pub column_name: Ident,
1346    pub as_keyword: bool,
1347}
1348#[cfg(not(feature = "sql"))]
1349impl Display for ReplaceSelectElement {
1350    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
1351        if self.as_keyword {
1352            write!(f, "{} AS {}", self.expr, self.column_name)
1353        } else {
1354            write!(f, "{} {}", self.expr, self.column_name)
1355        }
1356    }
1357}
1358
1359/// Additional options for wildcards, e.g. Snowflake `EXCLUDE`/`RENAME` and Bigquery `EXCEPT`.
1360#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug, Default)]
1361pub struct WildcardOptions {
1362    /// `[ILIKE...]`.
1363    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
1364    pub ilike: Option<IlikeSelectItem>,
1365    /// `[EXCLUDE...]`.
1366    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
1367    pub exclude: Option<ExcludeSelectItem>,
1368    /// `[EXCEPT...]`.
1369    ///  BigQuery syntax: <https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#select_except>
1370    ///  Clickhouse syntax: <https://clickhouse.com/docs/en/sql-reference/statements/select#except>
1371    pub except: Option<ExceptSelectItem>,
1372    /// `[REPLACE]`
1373    ///  BigQuery syntax: <https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#select_replace>
1374    ///  Clickhouse syntax: <https://clickhouse.com/docs/en/sql-reference/statements/select#replace>
1375    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
1376    pub replace: Option<PlannedReplaceSelectItem>,
1377    /// `[RENAME ...]`.
1378    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
1379    pub rename: Option<RenameSelectItem>,
1380}
1381
1382impl WildcardOptions {
1383    pub fn with_replace(self, replace: PlannedReplaceSelectItem) -> Self {
1384        WildcardOptions {
1385            ilike: self.ilike,
1386            exclude: self.exclude,
1387            except: self.except,
1388            replace: Some(replace),
1389            rename: self.rename,
1390        }
1391    }
1392}
1393
1394impl Display for WildcardOptions {
1395    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1396        if let Some(ilike) = &self.ilike {
1397            write!(f, " {ilike}")?;
1398        }
1399        if let Some(exclude) = &self.exclude {
1400            write!(f, " {exclude}")?;
1401        }
1402        if let Some(except) = &self.except {
1403            write!(f, " {except}")?;
1404        }
1405        if let Some(replace) = &self.replace {
1406            write!(f, " {replace}")?;
1407        }
1408        if let Some(rename) = &self.rename {
1409            write!(f, " {rename}")?;
1410        }
1411        Ok(())
1412    }
1413}
1414
1415/// The planned expressions for `REPLACE`
1416#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug, Default)]
1417pub struct PlannedReplaceSelectItem {
1418    /// The original ast nodes
1419    pub items: Vec<ReplaceSelectElement>,
1420    /// The expression planned from the ast nodes. They will be used when expanding the wildcard.
1421    pub planned_expressions: Vec<Expr>,
1422}
1423
1424impl Display for PlannedReplaceSelectItem {
1425    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1426        write!(f, "REPLACE")?;
1427        write!(f, " ({})", display_comma_separated(&self.items))?;
1428        Ok(())
1429    }
1430}
1431
1432impl PlannedReplaceSelectItem {
1433    pub fn items(&self) -> &[ReplaceSelectElement] {
1434        &self.items
1435    }
1436
1437    pub fn expressions(&self) -> &[Expr] {
1438        &self.planned_expressions
1439    }
1440}
1441
1442impl Expr {
1443    /// The name of the column (field) that this `Expr` will produce.
1444    ///
1445    /// For example, for a projection (e.g. `SELECT <expr>`) the resulting arrow
1446    /// [`Schema`] will have a field with this name.
1447    ///
1448    /// Note that the resulting string is subtlety different from the `Display`
1449    /// representation for certain `Expr`. Some differences:
1450    ///
1451    /// 1. [`Expr::Alias`], which shows only the alias itself
1452    /// 2. [`Expr::Cast`] / [`Expr::TryCast`], which only displays the expression
1453    ///
1454    /// # Example
1455    /// ```
1456    /// # use datafusion_expr::{col, lit};
1457    /// let expr = col("foo").eq(lit(42));
1458    /// assert_eq!("foo = Int32(42)", expr.schema_name().to_string());
1459    ///
1460    /// let expr = col("foo").alias("bar").eq(lit(11));
1461    /// assert_eq!("bar = Int32(11)", expr.schema_name().to_string());
1462    /// ```
1463    ///
1464    /// [`Schema`]: arrow::datatypes::Schema
1465    pub fn schema_name(&self) -> impl Display + '_ {
1466        SchemaDisplay(self)
1467    }
1468
1469    /// Human readable display formatting for this expression.
1470    ///
1471    /// This function is primarily used in printing the explain tree output,
1472    /// (e.g. `EXPLAIN FORMAT TREE <query>`), providing a readable format to
1473    /// show how expressions are used in physical and logical plans. See the
1474    /// [`Expr`] for other ways to format expressions
1475    ///
1476    /// Note this format is intended for human consumption rather than SQL for
1477    /// other systems. If you need  SQL to pass to other systems, consider using
1478    /// [`Unparser`].
1479    ///
1480    /// [`Unparser`]: https://docs.rs/datafusion/latest/datafusion/sql/unparser/struct.Unparser.html
1481    ///
1482    /// # Example
1483    /// ```
1484    /// # use datafusion_expr::{col, lit};
1485    /// let expr = col("foo") + lit(42);
1486    /// // For EXPLAIN output:
1487    /// // "foo + 42"
1488    /// println!("{}", expr.human_display());
1489    /// ```
1490    pub fn human_display(&self) -> impl Display + '_ {
1491        SqlDisplay(self)
1492    }
1493
1494    /// Returns the qualifier and the schema name of this expression.
1495    ///
1496    /// Used when the expression forms the output field of a certain plan.
1497    /// The result is the field's qualifier and field name in the plan's
1498    /// output schema. We can use this qualified name to reference the field.
1499    pub fn qualified_name(&self) -> (Option<TableReference>, String) {
1500        match self {
1501            Expr::Column(Column {
1502                relation,
1503                name,
1504                spans: _,
1505            }) => (relation.clone(), name.clone()),
1506            Expr::Alias(Alias { relation, name, .. }) => (relation.clone(), name.clone()),
1507            _ => (None, self.schema_name().to_string()),
1508        }
1509    }
1510
1511    /// Return String representation of the variant represented by `self`
1512    /// Useful for non-rust based bindings
1513    pub fn variant_name(&self) -> &str {
1514        match self {
1515            Expr::AggregateFunction { .. } => "AggregateFunction",
1516            Expr::Alias(..) => "Alias",
1517            Expr::Between { .. } => "Between",
1518            Expr::BinaryExpr { .. } => "BinaryExpr",
1519            Expr::Case { .. } => "Case",
1520            Expr::Cast { .. } => "Cast",
1521            Expr::Column(..) => "Column",
1522            Expr::OuterReferenceColumn(_, _) => "Outer",
1523            Expr::Exists { .. } => "Exists",
1524            Expr::GroupingSet(..) => "GroupingSet",
1525            Expr::InList { .. } => "InList",
1526            Expr::InSubquery(..) => "InSubquery",
1527            Expr::IsNotNull(..) => "IsNotNull",
1528            Expr::IsNull(..) => "IsNull",
1529            Expr::Like { .. } => "Like",
1530            Expr::SimilarTo { .. } => "RLike",
1531            Expr::IsTrue(..) => "IsTrue",
1532            Expr::IsFalse(..) => "IsFalse",
1533            Expr::IsUnknown(..) => "IsUnknown",
1534            Expr::IsNotTrue(..) => "IsNotTrue",
1535            Expr::IsNotFalse(..) => "IsNotFalse",
1536            Expr::IsNotUnknown(..) => "IsNotUnknown",
1537            Expr::Literal(..) => "Literal",
1538            Expr::Negative(..) => "Negative",
1539            Expr::Not(..) => "Not",
1540            Expr::Placeholder(_) => "Placeholder",
1541            Expr::ScalarFunction(..) => "ScalarFunction",
1542            Expr::ScalarSubquery { .. } => "ScalarSubquery",
1543            Expr::ScalarVariable(..) => "ScalarVariable",
1544            Expr::TryCast { .. } => "TryCast",
1545            Expr::WindowFunction { .. } => "WindowFunction",
1546            #[expect(deprecated)]
1547            Expr::Wildcard { .. } => "Wildcard",
1548            Expr::Unnest { .. } => "Unnest",
1549            Expr::Lambda { .. } => "Lambda",
1550        }
1551    }
1552
1553    /// Return `self == other`
1554    pub fn eq(self, other: Expr) -> Expr {
1555        binary_expr(self, Operator::Eq, other)
1556    }
1557
1558    /// Return `self != other`
1559    pub fn not_eq(self, other: Expr) -> Expr {
1560        binary_expr(self, Operator::NotEq, other)
1561    }
1562
1563    /// Return `self > other`
1564    pub fn gt(self, other: Expr) -> Expr {
1565        binary_expr(self, Operator::Gt, other)
1566    }
1567
1568    /// Return `self >= other`
1569    pub fn gt_eq(self, other: Expr) -> Expr {
1570        binary_expr(self, Operator::GtEq, other)
1571    }
1572
1573    /// Return `self < other`
1574    pub fn lt(self, other: Expr) -> Expr {
1575        binary_expr(self, Operator::Lt, other)
1576    }
1577
1578    /// Return `self <= other`
1579    pub fn lt_eq(self, other: Expr) -> Expr {
1580        binary_expr(self, Operator::LtEq, other)
1581    }
1582
1583    /// Return `self && other`
1584    pub fn and(self, other: Expr) -> Expr {
1585        binary_expr(self, Operator::And, other)
1586    }
1587
1588    /// Return `self || other`
1589    pub fn or(self, other: Expr) -> Expr {
1590        binary_expr(self, Operator::Or, other)
1591    }
1592
1593    /// Return `self LIKE other`
1594    pub fn like(self, other: Expr) -> Expr {
1595        Expr::Like(Like::new(
1596            false,
1597            Box::new(self),
1598            Box::new(other),
1599            None,
1600            false,
1601        ))
1602    }
1603
1604    /// Return `self NOT LIKE other`
1605    pub fn not_like(self, other: Expr) -> Expr {
1606        Expr::Like(Like::new(
1607            true,
1608            Box::new(self),
1609            Box::new(other),
1610            None,
1611            false,
1612        ))
1613    }
1614
1615    /// Return `self ILIKE other`
1616    pub fn ilike(self, other: Expr) -> Expr {
1617        Expr::Like(Like::new(
1618            false,
1619            Box::new(self),
1620            Box::new(other),
1621            None,
1622            true,
1623        ))
1624    }
1625
1626    /// Return `self NOT ILIKE other`
1627    pub fn not_ilike(self, other: Expr) -> Expr {
1628        Expr::Like(Like::new(true, Box::new(self), Box::new(other), None, true))
1629    }
1630
1631    /// Return the name to use for the specific Expr
1632    pub fn name_for_alias(&self) -> Result<String> {
1633        Ok(self.schema_name().to_string())
1634    }
1635
1636    /// Ensure `expr` has the name as `original_name` by adding an
1637    /// alias if necessary.
1638    pub fn alias_if_changed(self, original_name: String) -> Result<Expr> {
1639        let new_name = self.name_for_alias()?;
1640        if new_name == original_name {
1641            return Ok(self);
1642        }
1643
1644        Ok(self.alias(original_name))
1645    }
1646
1647    /// Return `self AS name` alias expression
1648    pub fn alias(self, name: impl Into<String>) -> Expr {
1649        Expr::Alias(Alias::new(self, None::<&str>, name.into()))
1650    }
1651
1652    /// Return `self AS name` alias expression with metadata
1653    ///
1654    /// The metadata will be attached to the Arrow Schema field when the expression
1655    /// is converted to a field via `Expr.to_field()`.
1656    ///
1657    /// # Example
1658    /// ```
1659    /// # use datafusion_expr::col;
1660    /// # use std::collections::HashMap;
1661    /// # use datafusion_common::metadata::FieldMetadata;
1662    /// let metadata = HashMap::from([("key".to_string(), "value".to_string())]);
1663    /// let metadata = FieldMetadata::from(metadata);
1664    /// let expr = col("foo").alias_with_metadata("bar", Some(metadata));
1665    /// ```
1666    pub fn alias_with_metadata(
1667        self,
1668        name: impl Into<String>,
1669        metadata: Option<FieldMetadata>,
1670    ) -> Expr {
1671        Expr::Alias(Alias::new(self, None::<&str>, name.into()).with_metadata(metadata))
1672    }
1673
1674    /// Return `self AS name` alias expression with a specific qualifier
1675    pub fn alias_qualified(
1676        self,
1677        relation: Option<impl Into<TableReference>>,
1678        name: impl Into<String>,
1679    ) -> Expr {
1680        Expr::Alias(Alias::new(self, relation, name.into()))
1681    }
1682
1683    /// Return `self AS name` alias expression with a specific qualifier and metadata
1684    ///
1685    /// The metadata will be attached to the Arrow Schema field when the expression
1686    /// is converted to a field via `Expr.to_field()`.
1687    ///
1688    /// # Example
1689    /// ```
1690    /// # use datafusion_expr::col;
1691    /// # use std::collections::HashMap;
1692    /// # use datafusion_common::metadata::FieldMetadata;
1693    /// let metadata = HashMap::from([("key".to_string(), "value".to_string())]);
1694    /// let metadata = FieldMetadata::from(metadata);
1695    /// let expr =
1696    ///     col("foo").alias_qualified_with_metadata(Some("tbl"), "bar", Some(metadata));
1697    /// ```
1698    pub fn alias_qualified_with_metadata(
1699        self,
1700        relation: Option<impl Into<TableReference>>,
1701        name: impl Into<String>,
1702        metadata: Option<FieldMetadata>,
1703    ) -> Expr {
1704        Expr::Alias(Alias::new(self, relation, name.into()).with_metadata(metadata))
1705    }
1706
1707    /// Remove an alias from an expression if one exists.
1708    ///
1709    /// If the expression is not an alias, the expression is returned unchanged.
1710    /// This method does not remove aliases from nested expressions.
1711    ///
1712    /// # Example
1713    /// ```
1714    /// # use datafusion_expr::col;
1715    /// // `foo as "bar"` is unaliased to `foo`
1716    /// let expr = col("foo").alias("bar");
1717    /// assert_eq!(expr.unalias(), col("foo"));
1718    ///
1719    /// // `foo as "bar" + baz` is not unaliased
1720    /// let expr = col("foo").alias("bar") + col("baz");
1721    /// assert_eq!(expr.clone().unalias(), expr);
1722    ///
1723    /// // `foo as "bar" as "baz" is unaliased to foo as "bar"
1724    /// let expr = col("foo").alias("bar").alias("baz");
1725    /// assert_eq!(expr.unalias(), col("foo").alias("bar"));
1726    /// ```
1727    pub fn unalias(self) -> Expr {
1728        match self {
1729            Expr::Alias(alias) => *alias.expr,
1730            _ => self,
1731        }
1732    }
1733
1734    /// Recursively removed potentially multiple aliases from an expression.
1735    ///
1736    /// This method removes nested aliases and returns [`Transformed`]
1737    /// to signal if the expression was changed.
1738    ///
1739    /// # Example
1740    /// ```
1741    /// # use datafusion_expr::col;
1742    /// // `foo as "bar"` is unaliased to `foo`
1743    /// let expr = col("foo").alias("bar");
1744    /// assert_eq!(expr.unalias_nested().data, col("foo"));
1745    ///
1746    /// // `foo as "bar" + baz` is  unaliased
1747    /// let expr = col("foo").alias("bar") + col("baz");
1748    /// assert_eq!(expr.clone().unalias_nested().data, col("foo") + col("baz"));
1749    ///
1750    /// // `foo as "bar" as "baz" is unalaised to foo
1751    /// let expr = col("foo").alias("bar").alias("baz");
1752    /// assert_eq!(expr.unalias_nested().data, col("foo"));
1753    /// ```
1754    pub fn unalias_nested(self) -> Transformed<Expr> {
1755        self.transform_down_up(
1756            |expr| {
1757                // f_down: skip subqueries.  Check in f_down to avoid recursing into them
1758                let recursion = if matches!(
1759                    expr,
1760                    Expr::Exists { .. } | Expr::ScalarSubquery(_) | Expr::InSubquery(_)
1761                ) {
1762                    // Subqueries could contain aliases so don't recurse into those
1763                    TreeNodeRecursion::Jump
1764                } else {
1765                    TreeNodeRecursion::Continue
1766                };
1767                Ok(Transformed::new(expr, false, recursion))
1768            },
1769            |expr| {
1770                // f_up: unalias on up so we can remove nested aliases like
1771                // `(x as foo) as bar`
1772                if let Expr::Alias(alias) = expr {
1773                    match alias
1774                        .metadata
1775                        .as_ref()
1776                        .map(|h| h.is_empty())
1777                        .unwrap_or(true)
1778                    {
1779                        true => Ok(Transformed::yes(*alias.expr)),
1780                        false => Ok(Transformed::no(Expr::Alias(alias))),
1781                    }
1782                } else {
1783                    Ok(Transformed::no(expr))
1784                }
1785            },
1786        )
1787        // Unreachable code: internal closure doesn't return err
1788        .unwrap()
1789    }
1790
1791    /// Return `self IN <list>` if `negated` is false, otherwise
1792    /// return `self NOT IN <list>`.a
1793    pub fn in_list(self, list: Vec<Expr>, negated: bool) -> Expr {
1794        Expr::InList(InList::new(Box::new(self), list, negated))
1795    }
1796
1797    /// Return `IsNull(Box(self))
1798    pub fn is_null(self) -> Expr {
1799        Expr::IsNull(Box::new(self))
1800    }
1801
1802    /// Return `IsNotNull(Box(self))
1803    pub fn is_not_null(self) -> Expr {
1804        Expr::IsNotNull(Box::new(self))
1805    }
1806
1807    /// Create a sort configuration from an existing expression.
1808    ///
1809    /// ```
1810    /// # use datafusion_expr::col;
1811    /// let sort_expr = col("foo").sort(true, true); // SORT ASC NULLS_FIRST
1812    /// ```
1813    pub fn sort(self, asc: bool, nulls_first: bool) -> Sort {
1814        Sort::new(self, asc, nulls_first)
1815    }
1816
1817    /// Return `IsTrue(Box(self))`
1818    pub fn is_true(self) -> Expr {
1819        Expr::IsTrue(Box::new(self))
1820    }
1821
1822    /// Return `IsNotTrue(Box(self))`
1823    pub fn is_not_true(self) -> Expr {
1824        Expr::IsNotTrue(Box::new(self))
1825    }
1826
1827    /// Return `IsFalse(Box(self))`
1828    pub fn is_false(self) -> Expr {
1829        Expr::IsFalse(Box::new(self))
1830    }
1831
1832    /// Return `IsNotFalse(Box(self))`
1833    pub fn is_not_false(self) -> Expr {
1834        Expr::IsNotFalse(Box::new(self))
1835    }
1836
1837    /// Return `IsUnknown(Box(self))`
1838    pub fn is_unknown(self) -> Expr {
1839        Expr::IsUnknown(Box::new(self))
1840    }
1841
1842    /// Return `IsNotUnknown(Box(self))`
1843    pub fn is_not_unknown(self) -> Expr {
1844        Expr::IsNotUnknown(Box::new(self))
1845    }
1846
1847    /// return `self BETWEEN low AND high`
1848    pub fn between(self, low: Expr, high: Expr) -> Expr {
1849        Expr::Between(Between::new(
1850            Box::new(self),
1851            false,
1852            Box::new(low),
1853            Box::new(high),
1854        ))
1855    }
1856
1857    /// Return `self NOT BETWEEN low AND high`
1858    pub fn not_between(self, low: Expr, high: Expr) -> Expr {
1859        Expr::Between(Between::new(
1860            Box::new(self),
1861            true,
1862            Box::new(low),
1863            Box::new(high),
1864        ))
1865    }
1866    /// Return a reference to the inner `Column` if any
1867    ///
1868    /// returns `None` if the expression is not a `Column`
1869    ///
1870    /// Note: None may be returned for expressions that are not `Column` but
1871    /// are convertible to `Column` such as `Cast` expressions.
1872    ///
1873    /// Example
1874    /// ```
1875    /// # use datafusion_common::Column;
1876    /// use datafusion_expr::{col, Expr};
1877    /// let expr = col("foo");
1878    /// assert_eq!(expr.try_as_col(), Some(&Column::from("foo")));
1879    ///
1880    /// let expr = col("foo").alias("bar");
1881    /// assert_eq!(expr.try_as_col(), None);
1882    /// ```
1883    pub fn try_as_col(&self) -> Option<&Column> {
1884        if let Expr::Column(it) = self {
1885            Some(it)
1886        } else {
1887            None
1888        }
1889    }
1890
1891    /// Returns the inner `Column` if any. This is a specialized version of
1892    /// [`Self::try_as_col`] that take Cast expressions into account when the
1893    /// expression is as on condition for joins.
1894    ///
1895    /// Called this method when you are sure that the expression is a `Column`
1896    /// or a `Cast` expression that wraps a `Column`.
1897    pub fn get_as_join_column(&self) -> Option<&Column> {
1898        match self {
1899            Expr::Column(c) => Some(c),
1900            Expr::Cast(Cast { expr, .. }) => match &**expr {
1901                Expr::Column(c) => Some(c),
1902                _ => None,
1903            },
1904            _ => None,
1905        }
1906    }
1907
1908    /// Return all references to columns in this expression.
1909    ///
1910    /// # Example
1911    /// ```
1912    /// # use std::collections::HashSet;
1913    /// # use datafusion_common::Column;
1914    /// # use datafusion_expr::col;
1915    /// // For an expression `a + (b * a)`
1916    /// let expr = col("a") + (col("b") * col("a"));
1917    /// let refs = expr.column_refs();
1918    /// // refs contains "a" and "b"
1919    /// assert_eq!(refs.len(), 2);
1920    /// assert!(refs.contains(&Column::new_unqualified("a")));
1921    /// assert!(refs.contains(&Column::new_unqualified("b")));
1922    /// ```
1923    pub fn column_refs(&self) -> HashSet<&Column> {
1924        let mut using_columns = HashSet::new();
1925        self.add_column_refs(&mut using_columns);
1926        using_columns
1927    }
1928
1929    /// Adds references to all columns in this expression to the set
1930    ///
1931    /// See [`Self::column_refs`] for details
1932    pub fn add_column_refs<'a>(&'a self, set: &mut HashSet<&'a Column>) {
1933        self.apply_with_lambdas_params(|expr, lambdas_params| {
1934            if let Expr::Column(col) = expr {
1935                if col.relation.is_some() || !lambdas_params.contains(col.name()) {
1936                    set.insert(col);
1937                }
1938            }
1939            Ok(TreeNodeRecursion::Continue)
1940        })
1941        .expect("traversal is infallible");
1942    }
1943
1944    /// Return all references to columns and their occurrence counts in the expression.
1945    ///
1946    /// # Example
1947    /// ```
1948    /// # use std::collections::HashMap;
1949    /// # use datafusion_common::Column;
1950    /// # use datafusion_expr::col;
1951    /// // For an expression `a + (b * a)`
1952    /// let expr = col("a") + (col("b") * col("a"));
1953    /// let mut refs = expr.column_refs_counts();
1954    /// // refs contains "a" and "b"
1955    /// assert_eq!(refs.len(), 2);
1956    /// assert_eq!(*refs.get(&Column::new_unqualified("a")).unwrap(), 2);
1957    /// assert_eq!(*refs.get(&Column::new_unqualified("b")).unwrap(), 1);
1958    /// ```
1959    pub fn column_refs_counts(&self) -> HashMap<&Column, usize> {
1960        let mut map = HashMap::new();
1961        self.add_column_ref_counts(&mut map);
1962        map
1963    }
1964
1965    /// Adds references to all columns and their occurrence counts in the expression to
1966    /// the map.
1967    ///
1968    /// See [`Self::column_refs_counts`] for details
1969    pub fn add_column_ref_counts<'a>(&'a self, map: &mut HashMap<&'a Column, usize>) {
1970        self.apply_with_lambdas_params(|expr, lambdas_params| {
1971            if let Expr::Column(col) = expr {
1972                if !col.is_lambda_parameter(lambdas_params) {
1973                    *map.entry(col).or_default() += 1;
1974                }
1975            }
1976            Ok(TreeNodeRecursion::Continue)
1977        })
1978        .expect("traversal is infallible");
1979    }
1980
1981    /// Returns true if there are any column references in this Expr
1982    pub fn any_column_refs(&self) -> bool {
1983        self.exists_with_lambdas_params(|expr, lambdas_params| {
1984            Ok(matches!(expr, Expr::Column(c) if !c.is_lambda_parameter(lambdas_params)))
1985        })
1986        .expect("exists closure is infallible")
1987    }
1988
1989    /// Return true if the expression contains out reference(correlated) expressions.
1990    pub fn contains_outer(&self) -> bool {
1991        self.exists(|expr| Ok(matches!(expr, Expr::OuterReferenceColumn { .. })))
1992            .expect("exists closure is infallible")
1993    }
1994
1995    /// Returns true if the expression node is volatile, i.e. whether it can return
1996    /// different results when evaluated multiple times with the same input.
1997    /// Note: unlike [`Self::is_volatile`], this function does not consider inputs:
1998    /// - `rand()` returns `true`,
1999    /// - `a + rand()` returns `false`
2000    pub fn is_volatile_node(&self) -> bool {
2001        matches!(self, Expr::ScalarFunction(func) if func.func.signature().volatility == Volatility::Volatile)
2002    }
2003
2004    /// Returns true if the expression is volatile, i.e. whether it can return different
2005    /// results when evaluated multiple times with the same input.
2006    ///
2007    /// For example the function call `RANDOM()` is volatile as each call will
2008    /// return a different value.
2009    ///
2010    /// See [`Volatility`] for more information.
2011    pub fn is_volatile(&self) -> bool {
2012        self.exists(|expr| Ok(expr.is_volatile_node()))
2013            .expect("exists closure is infallible")
2014    }
2015
2016    /// Recursively find all [`Expr::Placeholder`] expressions, and
2017    /// to infer their [`DataType`] from the context of their use.
2018    ///
2019    /// For example, given an expression like `<int32> = $0` will infer `$0` to
2020    /// have type `int32`.
2021    ///
2022    /// Returns transformed expression and flag that is true if expression contains
2023    /// at least one placeholder.
2024    pub fn infer_placeholder_types(self, schema: &DFSchema) -> Result<(Expr, bool)> {
2025        let mut has_placeholder = false;
2026        self.transform_with_schema(schema, |mut expr, schema| {
2027            match &mut expr {
2028                // Default to assuming the arguments are the same type
2029                Expr::BinaryExpr(BinaryExpr { left, op: _, right }) => {
2030                    rewrite_placeholder(left.as_mut(), right.as_ref(), schema)?;
2031                    rewrite_placeholder(right.as_mut(), left.as_ref(), schema)?;
2032                }
2033                Expr::Between(Between {
2034                    expr,
2035                    negated: _,
2036                    low,
2037                    high,
2038                }) => {
2039                    rewrite_placeholder(low.as_mut(), expr.as_ref(), schema)?;
2040                    rewrite_placeholder(high.as_mut(), expr.as_ref(), schema)?;
2041                }
2042                Expr::InList(InList {
2043                    expr,
2044                    list,
2045                    negated: _,
2046                }) => {
2047                    for item in list.iter_mut() {
2048                        rewrite_placeholder(item, expr.as_ref(), schema)?;
2049                    }
2050                }
2051                Expr::Like(Like { expr, pattern, .. })
2052                | Expr::SimilarTo(Like { expr, pattern, .. }) => {
2053                    rewrite_placeholder(pattern.as_mut(), expr.as_ref(), schema)?;
2054                }
2055                Expr::Placeholder(_) => {
2056                    has_placeholder = true;
2057                }
2058                _ => {}
2059            }
2060            Ok(Transformed::yes(expr))
2061        })
2062        .data()
2063        .map(|data| (data, has_placeholder))
2064    }
2065
2066    /// Returns true if some of this `exprs` subexpressions may not be evaluated
2067    /// and thus any side effects (like divide by zero) may not be encountered
2068    pub fn short_circuits(&self) -> bool {
2069        match self {
2070            Expr::ScalarFunction(ScalarFunction { func, .. }) => func.short_circuits(),
2071            Expr::BinaryExpr(BinaryExpr { op, .. }) => {
2072                matches!(op, Operator::And | Operator::Or)
2073            }
2074            Expr::Case { .. } => true,
2075            // Use explicit pattern match instead of a default
2076            // implementation, so that in the future if someone adds
2077            // new Expr types, they will check here as well
2078            // TODO: remove the next line after `Expr::Wildcard` is removed
2079            #[expect(deprecated)]
2080            Expr::AggregateFunction(..)
2081            | Expr::Alias(..)
2082            | Expr::Between(..)
2083            | Expr::Cast(..)
2084            | Expr::Column(..)
2085            | Expr::Exists(..)
2086            | Expr::GroupingSet(..)
2087            | Expr::InList(..)
2088            | Expr::InSubquery(..)
2089            | Expr::IsFalse(..)
2090            | Expr::IsNotFalse(..)
2091            | Expr::IsNotNull(..)
2092            | Expr::IsNotTrue(..)
2093            | Expr::IsNotUnknown(..)
2094            | Expr::IsNull(..)
2095            | Expr::IsTrue(..)
2096            | Expr::IsUnknown(..)
2097            | Expr::Like(..)
2098            | Expr::ScalarSubquery(..)
2099            | Expr::ScalarVariable(_, _)
2100            | Expr::SimilarTo(..)
2101            | Expr::Not(..)
2102            | Expr::Negative(..)
2103            | Expr::OuterReferenceColumn(_, _)
2104            | Expr::TryCast(..)
2105            | Expr::Unnest(..)
2106            | Expr::Wildcard { .. }
2107            | Expr::WindowFunction(..)
2108            | Expr::Literal(..)
2109            | Expr::Placeholder(..)
2110            | Expr::Lambda { .. } => false,
2111        }
2112    }
2113
2114    /// Returns a reference to the set of locations in the SQL query where this
2115    /// expression appears, if known. [`None`] is returned if the expression
2116    /// type doesn't support tracking locations yet.
2117    pub fn spans(&self) -> Option<&Spans> {
2118        match self {
2119            Expr::Column(col) => Some(&col.spans),
2120            _ => None,
2121        }
2122    }
2123
2124    /// Check if the Expr is literal and get the literal value if it is.
2125    pub fn as_literal(&self) -> Option<&ScalarValue> {
2126        if let Expr::Literal(lit, _) = self {
2127            Some(lit)
2128        } else {
2129            None
2130        }
2131    }
2132}
2133
2134impl Normalizeable for Expr {
2135    fn can_normalize(&self) -> bool {
2136        #[allow(clippy::match_like_matches_macro)]
2137        match self {
2138            Expr::BinaryExpr(BinaryExpr {
2139                op:
2140                    _op @ (Operator::Plus
2141                    | Operator::Multiply
2142                    | Operator::BitwiseAnd
2143                    | Operator::BitwiseOr
2144                    | Operator::BitwiseXor
2145                    | Operator::Eq
2146                    | Operator::NotEq),
2147                ..
2148            }) => true,
2149            _ => false,
2150        }
2151    }
2152}
2153
2154impl NormalizeEq for Expr {
2155    fn normalize_eq(&self, other: &Self) -> bool {
2156        match (self, other) {
2157            (
2158                Expr::BinaryExpr(BinaryExpr {
2159                    left: self_left,
2160                    op: self_op,
2161                    right: self_right,
2162                }),
2163                Expr::BinaryExpr(BinaryExpr {
2164                    left: other_left,
2165                    op: other_op,
2166                    right: other_right,
2167                }),
2168            ) => {
2169                if self_op != other_op {
2170                    return false;
2171                }
2172
2173                if matches!(
2174                    self_op,
2175                    Operator::Plus
2176                        | Operator::Multiply
2177                        | Operator::BitwiseAnd
2178                        | Operator::BitwiseOr
2179                        | Operator::BitwiseXor
2180                        | Operator::Eq
2181                        | Operator::NotEq
2182                ) {
2183                    (self_left.normalize_eq(other_left)
2184                        && self_right.normalize_eq(other_right))
2185                        || (self_left.normalize_eq(other_right)
2186                            && self_right.normalize_eq(other_left))
2187                } else {
2188                    self_left.normalize_eq(other_left)
2189                        && self_right.normalize_eq(other_right)
2190                }
2191            }
2192            (
2193                Expr::Alias(Alias {
2194                    expr: self_expr,
2195                    relation: self_relation,
2196                    name: self_name,
2197                    ..
2198                }),
2199                Expr::Alias(Alias {
2200                    expr: other_expr,
2201                    relation: other_relation,
2202                    name: other_name,
2203                    ..
2204                }),
2205            ) => {
2206                self_name == other_name
2207                    && self_relation == other_relation
2208                    && self_expr.normalize_eq(other_expr)
2209            }
2210            (
2211                Expr::Like(Like {
2212                    negated: self_negated,
2213                    expr: self_expr,
2214                    pattern: self_pattern,
2215                    escape_char: self_escape_char,
2216                    case_insensitive: self_case_insensitive,
2217                }),
2218                Expr::Like(Like {
2219                    negated: other_negated,
2220                    expr: other_expr,
2221                    pattern: other_pattern,
2222                    escape_char: other_escape_char,
2223                    case_insensitive: other_case_insensitive,
2224                }),
2225            )
2226            | (
2227                Expr::SimilarTo(Like {
2228                    negated: self_negated,
2229                    expr: self_expr,
2230                    pattern: self_pattern,
2231                    escape_char: self_escape_char,
2232                    case_insensitive: self_case_insensitive,
2233                }),
2234                Expr::SimilarTo(Like {
2235                    negated: other_negated,
2236                    expr: other_expr,
2237                    pattern: other_pattern,
2238                    escape_char: other_escape_char,
2239                    case_insensitive: other_case_insensitive,
2240                }),
2241            ) => {
2242                self_negated == other_negated
2243                    && self_escape_char == other_escape_char
2244                    && self_case_insensitive == other_case_insensitive
2245                    && self_expr.normalize_eq(other_expr)
2246                    && self_pattern.normalize_eq(other_pattern)
2247            }
2248            (Expr::Not(self_expr), Expr::Not(other_expr))
2249            | (Expr::IsNull(self_expr), Expr::IsNull(other_expr))
2250            | (Expr::IsTrue(self_expr), Expr::IsTrue(other_expr))
2251            | (Expr::IsFalse(self_expr), Expr::IsFalse(other_expr))
2252            | (Expr::IsUnknown(self_expr), Expr::IsUnknown(other_expr))
2253            | (Expr::IsNotNull(self_expr), Expr::IsNotNull(other_expr))
2254            | (Expr::IsNotTrue(self_expr), Expr::IsNotTrue(other_expr))
2255            | (Expr::IsNotFalse(self_expr), Expr::IsNotFalse(other_expr))
2256            | (Expr::IsNotUnknown(self_expr), Expr::IsNotUnknown(other_expr))
2257            | (Expr::Negative(self_expr), Expr::Negative(other_expr))
2258            | (
2259                Expr::Unnest(Unnest { expr: self_expr }),
2260                Expr::Unnest(Unnest { expr: other_expr }),
2261            ) => self_expr.normalize_eq(other_expr),
2262            (
2263                Expr::Between(Between {
2264                    expr: self_expr,
2265                    negated: self_negated,
2266                    low: self_low,
2267                    high: self_high,
2268                }),
2269                Expr::Between(Between {
2270                    expr: other_expr,
2271                    negated: other_negated,
2272                    low: other_low,
2273                    high: other_high,
2274                }),
2275            ) => {
2276                self_negated == other_negated
2277                    && self_expr.normalize_eq(other_expr)
2278                    && self_low.normalize_eq(other_low)
2279                    && self_high.normalize_eq(other_high)
2280            }
2281            (
2282                Expr::Cast(Cast {
2283                    expr: self_expr,
2284                    data_type: self_data_type,
2285                }),
2286                Expr::Cast(Cast {
2287                    expr: other_expr,
2288                    data_type: other_data_type,
2289                }),
2290            )
2291            | (
2292                Expr::TryCast(TryCast {
2293                    expr: self_expr,
2294                    data_type: self_data_type,
2295                }),
2296                Expr::TryCast(TryCast {
2297                    expr: other_expr,
2298                    data_type: other_data_type,
2299                }),
2300            ) => self_data_type == other_data_type && self_expr.normalize_eq(other_expr),
2301            (
2302                Expr::ScalarFunction(ScalarFunction {
2303                    func: self_func,
2304                    args: self_args,
2305                }),
2306                Expr::ScalarFunction(ScalarFunction {
2307                    func: other_func,
2308                    args: other_args,
2309                }),
2310            ) => {
2311                self_func.name() == other_func.name()
2312                    && self_args.len() == other_args.len()
2313                    && self_args
2314                        .iter()
2315                        .zip(other_args.iter())
2316                        .all(|(a, b)| a.normalize_eq(b))
2317            }
2318            (
2319                Expr::AggregateFunction(AggregateFunction {
2320                    func: self_func,
2321                    params:
2322                        AggregateFunctionParams {
2323                            args: self_args,
2324                            distinct: self_distinct,
2325                            filter: self_filter,
2326                            order_by: self_order_by,
2327                            null_treatment: self_null_treatment,
2328                        },
2329                }),
2330                Expr::AggregateFunction(AggregateFunction {
2331                    func: other_func,
2332                    params:
2333                        AggregateFunctionParams {
2334                            args: other_args,
2335                            distinct: other_distinct,
2336                            filter: other_filter,
2337                            order_by: other_order_by,
2338                            null_treatment: other_null_treatment,
2339                        },
2340                }),
2341            ) => {
2342                self_func.name() == other_func.name()
2343                    && self_distinct == other_distinct
2344                    && self_null_treatment == other_null_treatment
2345                    && self_args.len() == other_args.len()
2346                    && self_args
2347                        .iter()
2348                        .zip(other_args.iter())
2349                        .all(|(a, b)| a.normalize_eq(b))
2350                    && match (self_filter, other_filter) {
2351                        (Some(self_filter), Some(other_filter)) => {
2352                            self_filter.normalize_eq(other_filter)
2353                        }
2354                        (None, None) => true,
2355                        _ => false,
2356                    }
2357                    && self_order_by
2358                        .iter()
2359                        .zip(other_order_by.iter())
2360                        .all(|(a, b)| {
2361                            a.asc == b.asc
2362                                && a.nulls_first == b.nulls_first
2363                                && a.expr.normalize_eq(&b.expr)
2364                        })
2365                    && self_order_by.len() == other_order_by.len()
2366            }
2367            (Expr::WindowFunction(left), Expr::WindowFunction(other)) => {
2368                let WindowFunction {
2369                    fun: self_fun,
2370                    params:
2371                        WindowFunctionParams {
2372                            args: self_args,
2373                            window_frame: self_window_frame,
2374                            partition_by: self_partition_by,
2375                            order_by: self_order_by,
2376                            filter: self_filter,
2377                            null_treatment: self_null_treatment,
2378                            distinct: self_distinct,
2379                        },
2380                } = left.as_ref();
2381                let WindowFunction {
2382                    fun: other_fun,
2383                    params:
2384                        WindowFunctionParams {
2385                            args: other_args,
2386                            window_frame: other_window_frame,
2387                            partition_by: other_partition_by,
2388                            order_by: other_order_by,
2389                            filter: other_filter,
2390                            null_treatment: other_null_treatment,
2391                            distinct: other_distinct,
2392                        },
2393                } = other.as_ref();
2394
2395                self_fun.name() == other_fun.name()
2396                    && self_window_frame == other_window_frame
2397                    && match (self_filter, other_filter) {
2398                        (Some(a), Some(b)) => a.normalize_eq(b),
2399                        (None, None) => true,
2400                        _ => false,
2401                    }
2402                    && self_null_treatment == other_null_treatment
2403                    && self_args.len() == other_args.len()
2404                    && self_args
2405                        .iter()
2406                        .zip(other_args.iter())
2407                        .all(|(a, b)| a.normalize_eq(b))
2408                    && self_partition_by
2409                        .iter()
2410                        .zip(other_partition_by.iter())
2411                        .all(|(a, b)| a.normalize_eq(b))
2412                    && self_order_by
2413                        .iter()
2414                        .zip(other_order_by.iter())
2415                        .all(|(a, b)| {
2416                            a.asc == b.asc
2417                                && a.nulls_first == b.nulls_first
2418                                && a.expr.normalize_eq(&b.expr)
2419                        })
2420                    && self_distinct == other_distinct
2421            }
2422            (
2423                Expr::Exists(Exists {
2424                    subquery: self_subquery,
2425                    negated: self_negated,
2426                }),
2427                Expr::Exists(Exists {
2428                    subquery: other_subquery,
2429                    negated: other_negated,
2430                }),
2431            ) => {
2432                self_negated == other_negated
2433                    && self_subquery.normalize_eq(other_subquery)
2434            }
2435            (
2436                Expr::InSubquery(InSubquery {
2437                    expr: self_expr,
2438                    subquery: self_subquery,
2439                    negated: self_negated,
2440                }),
2441                Expr::InSubquery(InSubquery {
2442                    expr: other_expr,
2443                    subquery: other_subquery,
2444                    negated: other_negated,
2445                }),
2446            ) => {
2447                self_negated == other_negated
2448                    && self_expr.normalize_eq(other_expr)
2449                    && self_subquery.normalize_eq(other_subquery)
2450            }
2451            (
2452                Expr::ScalarSubquery(self_subquery),
2453                Expr::ScalarSubquery(other_subquery),
2454            ) => self_subquery.normalize_eq(other_subquery),
2455            (
2456                Expr::GroupingSet(GroupingSet::Rollup(self_exprs)),
2457                Expr::GroupingSet(GroupingSet::Rollup(other_exprs)),
2458            )
2459            | (
2460                Expr::GroupingSet(GroupingSet::Cube(self_exprs)),
2461                Expr::GroupingSet(GroupingSet::Cube(other_exprs)),
2462            ) => {
2463                self_exprs.len() == other_exprs.len()
2464                    && self_exprs
2465                        .iter()
2466                        .zip(other_exprs.iter())
2467                        .all(|(a, b)| a.normalize_eq(b))
2468            }
2469            (
2470                Expr::GroupingSet(GroupingSet::GroupingSets(self_exprs)),
2471                Expr::GroupingSet(GroupingSet::GroupingSets(other_exprs)),
2472            ) => {
2473                self_exprs.len() == other_exprs.len()
2474                    && self_exprs.iter().zip(other_exprs.iter()).all(|(a, b)| {
2475                        a.len() == b.len()
2476                            && a.iter().zip(b.iter()).all(|(x, y)| x.normalize_eq(y))
2477                    })
2478            }
2479            (
2480                Expr::InList(InList {
2481                    expr: self_expr,
2482                    list: self_list,
2483                    negated: self_negated,
2484                }),
2485                Expr::InList(InList {
2486                    expr: other_expr,
2487                    list: other_list,
2488                    negated: other_negated,
2489                }),
2490            ) => {
2491                // TODO: normalize_eq for lists, for example `a IN (c1 + c3, c3)` is equal to `a IN (c3, c1 + c3)`
2492                self_negated == other_negated
2493                    && self_expr.normalize_eq(other_expr)
2494                    && self_list.len() == other_list.len()
2495                    && self_list
2496                        .iter()
2497                        .zip(other_list.iter())
2498                        .all(|(a, b)| a.normalize_eq(b))
2499            }
2500            (
2501                Expr::Case(Case {
2502                    expr: self_expr,
2503                    when_then_expr: self_when_then_expr,
2504                    else_expr: self_else_expr,
2505                }),
2506                Expr::Case(Case {
2507                    expr: other_expr,
2508                    when_then_expr: other_when_then_expr,
2509                    else_expr: other_else_expr,
2510                }),
2511            ) => {
2512                // TODO: normalize_eq for when_then_expr
2513                // for example `CASE a WHEN 1 THEN 2 WHEN 3 THEN 4 ELSE 5 END` is equal to `CASE a WHEN 3 THEN 4 WHEN 1 THEN 2 ELSE 5 END`
2514                self_when_then_expr.len() == other_when_then_expr.len()
2515                    && self_when_then_expr
2516                        .iter()
2517                        .zip(other_when_then_expr.iter())
2518                        .all(|((self_when, self_then), (other_when, other_then))| {
2519                            self_when.normalize_eq(other_when)
2520                                && self_then.normalize_eq(other_then)
2521                        })
2522                    && match (self_expr, other_expr) {
2523                        (Some(self_expr), Some(other_expr)) => {
2524                            self_expr.normalize_eq(other_expr)
2525                        }
2526                        (None, None) => true,
2527                        (_, _) => false,
2528                    }
2529                    && match (self_else_expr, other_else_expr) {
2530                        (Some(self_else_expr), Some(other_else_expr)) => {
2531                            self_else_expr.normalize_eq(other_else_expr)
2532                        }
2533                        (None, None) => true,
2534                        (_, _) => false,
2535                    }
2536            }
2537            (_, _) => self == other,
2538        }
2539    }
2540}
2541
2542impl HashNode for Expr {
2543    /// As it is pretty easy to forget changing this method when `Expr` changes the
2544    /// implementation doesn't use wildcard patterns (`..`, `_`) to catch changes
2545    /// compile time.
2546    fn hash_node<H: Hasher>(&self, state: &mut H) {
2547        mem::discriminant(self).hash(state);
2548        match self {
2549            Expr::Alias(Alias {
2550                expr: _expr,
2551                relation,
2552                name,
2553                ..
2554            }) => {
2555                relation.hash(state);
2556                name.hash(state);
2557            }
2558            Expr::Column(column) => {
2559                column.hash(state);
2560            }
2561            Expr::ScalarVariable(data_type, name) => {
2562                data_type.hash(state);
2563                name.hash(state);
2564            }
2565            Expr::Literal(scalar_value, _) => {
2566                scalar_value.hash(state);
2567            }
2568            Expr::BinaryExpr(BinaryExpr {
2569                left: _left,
2570                op,
2571                right: _right,
2572            }) => {
2573                op.hash(state);
2574            }
2575            Expr::Like(Like {
2576                negated,
2577                expr: _expr,
2578                pattern: _pattern,
2579                escape_char,
2580                case_insensitive,
2581            })
2582            | Expr::SimilarTo(Like {
2583                negated,
2584                expr: _expr,
2585                pattern: _pattern,
2586                escape_char,
2587                case_insensitive,
2588            }) => {
2589                negated.hash(state);
2590                escape_char.hash(state);
2591                case_insensitive.hash(state);
2592            }
2593            Expr::Not(_expr)
2594            | Expr::IsNotNull(_expr)
2595            | Expr::IsNull(_expr)
2596            | Expr::IsTrue(_expr)
2597            | Expr::IsFalse(_expr)
2598            | Expr::IsUnknown(_expr)
2599            | Expr::IsNotTrue(_expr)
2600            | Expr::IsNotFalse(_expr)
2601            | Expr::IsNotUnknown(_expr)
2602            | Expr::Negative(_expr) => {}
2603            Expr::Between(Between {
2604                expr: _expr,
2605                negated,
2606                low: _low,
2607                high: _high,
2608            }) => {
2609                negated.hash(state);
2610            }
2611            Expr::Case(Case {
2612                expr: _expr,
2613                when_then_expr: _when_then_expr,
2614                else_expr: _else_expr,
2615            }) => {}
2616            Expr::Cast(Cast {
2617                expr: _expr,
2618                data_type,
2619            })
2620            | Expr::TryCast(TryCast {
2621                expr: _expr,
2622                data_type,
2623            }) => {
2624                data_type.hash(state);
2625            }
2626            Expr::ScalarFunction(ScalarFunction { func, args: _args }) => {
2627                func.hash(state);
2628            }
2629            Expr::AggregateFunction(AggregateFunction {
2630                func,
2631                params:
2632                    AggregateFunctionParams {
2633                        args: _args,
2634                        distinct,
2635                        filter: _,
2636                        order_by: _,
2637                        null_treatment,
2638                    },
2639            }) => {
2640                func.hash(state);
2641                distinct.hash(state);
2642                null_treatment.hash(state);
2643            }
2644            Expr::WindowFunction(window_fun) => {
2645                let WindowFunction {
2646                    fun,
2647                    params:
2648                        WindowFunctionParams {
2649                            args: _args,
2650                            partition_by: _,
2651                            order_by: _,
2652                            window_frame,
2653                            filter,
2654                            null_treatment,
2655                            distinct,
2656                        },
2657                } = window_fun.as_ref();
2658                fun.hash(state);
2659                window_frame.hash(state);
2660                filter.hash(state);
2661                null_treatment.hash(state);
2662                distinct.hash(state);
2663            }
2664            Expr::InList(InList {
2665                expr: _expr,
2666                list: _list,
2667                negated,
2668            }) => {
2669                negated.hash(state);
2670            }
2671            Expr::Exists(Exists { subquery, negated }) => {
2672                subquery.hash(state);
2673                negated.hash(state);
2674            }
2675            Expr::InSubquery(InSubquery {
2676                expr: _expr,
2677                subquery,
2678                negated,
2679            }) => {
2680                subquery.hash(state);
2681                negated.hash(state);
2682            }
2683            Expr::ScalarSubquery(subquery) => {
2684                subquery.hash(state);
2685            }
2686            #[expect(deprecated)]
2687            Expr::Wildcard { qualifier, options } => {
2688                qualifier.hash(state);
2689                options.hash(state);
2690            }
2691            Expr::GroupingSet(grouping_set) => {
2692                mem::discriminant(grouping_set).hash(state);
2693                match grouping_set {
2694                    GroupingSet::Rollup(_exprs) | GroupingSet::Cube(_exprs) => {}
2695                    GroupingSet::GroupingSets(_exprs) => {}
2696                }
2697            }
2698            Expr::Placeholder(place_holder) => {
2699                place_holder.hash(state);
2700            }
2701            Expr::OuterReferenceColumn(field, column) => {
2702                field.hash(state);
2703                column.hash(state);
2704            }
2705            Expr::Unnest(Unnest { expr: _expr }) => {}
2706            Expr::Lambda(Lambda {
2707                params,
2708                body: _,
2709            }) => {
2710                params.hash(state);
2711            }
2712        };
2713    }
2714}
2715
2716// Modifies expr to match the DataType, metadata, and nullability of other if it is
2717// a placeholder with previously unspecified type information (i.e., most placeholders)
2718fn rewrite_placeholder(expr: &mut Expr, other: &Expr, schema: &DFSchema) -> Result<()> {
2719    if let Expr::Placeholder(Placeholder { id: _, field }) = expr {
2720        if field.is_none() {
2721            let other_field = other.to_field(schema);
2722            match other_field {
2723                Err(e) => {
2724                    Err(e.context(format!(
2725                        "Can not find type of {other} needed to infer type of {expr}"
2726                    )))?;
2727                }
2728                Ok((_, other_field)) => {
2729                    // We can't infer the nullability of the future parameter that might
2730                    // be bound, so ensure this is set to true
2731                    *field =
2732                        Some(other_field.as_ref().clone().with_nullable(true).into());
2733                }
2734            }
2735        };
2736    }
2737    Ok(())
2738}
2739
2740#[macro_export]
2741macro_rules! expr_vec_fmt {
2742    ( $ARRAY:expr ) => {{
2743        $ARRAY
2744            .iter()
2745            .map(|e| format!("{e}"))
2746            .collect::<Vec<String>>()
2747            .join(", ")
2748    }};
2749}
2750
2751struct SchemaDisplay<'a>(&'a Expr);
2752impl Display for SchemaDisplay<'_> {
2753    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
2754        match self.0 {
2755            // The same as Display
2756            // TODO: remove the next line after `Expr::Wildcard` is removed
2757            #[expect(deprecated)]
2758            Expr::Column(_)
2759            | Expr::Literal(_, _)
2760            | Expr::ScalarVariable(..)
2761            | Expr::OuterReferenceColumn(..)
2762            | Expr::Placeholder(_)
2763            | Expr::Wildcard { .. } => write!(f, "{}", self.0),
2764            Expr::AggregateFunction(AggregateFunction { func, params }) => {
2765                match func.schema_name(params) {
2766                    Ok(name) => {
2767                        write!(f, "{name}")
2768                    }
2769                    Err(e) => {
2770                        write!(f, "got error from schema_name {e}")
2771                    }
2772                }
2773            }
2774            // Expr is not shown since it is aliased
2775            Expr::Alias(Alias {
2776                name,
2777                relation: Some(relation),
2778                ..
2779            }) => write!(f, "{relation}.{name}"),
2780            Expr::Alias(Alias { name, .. }) => write!(f, "{name}"),
2781            Expr::Between(Between {
2782                expr,
2783                negated,
2784                low,
2785                high,
2786            }) => {
2787                if *negated {
2788                    write!(
2789                        f,
2790                        "{} NOT BETWEEN {} AND {}",
2791                        SchemaDisplay(expr),
2792                        SchemaDisplay(low),
2793                        SchemaDisplay(high),
2794                    )
2795                } else {
2796                    write!(
2797                        f,
2798                        "{} BETWEEN {} AND {}",
2799                        SchemaDisplay(expr),
2800                        SchemaDisplay(low),
2801                        SchemaDisplay(high),
2802                    )
2803                }
2804            }
2805            Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
2806                write!(f, "{} {op} {}", SchemaDisplay(left), SchemaDisplay(right),)
2807            }
2808            Expr::Case(Case {
2809                expr,
2810                when_then_expr,
2811                else_expr,
2812            }) => {
2813                write!(f, "CASE ")?;
2814
2815                if let Some(e) = expr {
2816                    write!(f, "{} ", SchemaDisplay(e))?;
2817                }
2818
2819                for (when, then) in when_then_expr {
2820                    write!(
2821                        f,
2822                        "WHEN {} THEN {} ",
2823                        SchemaDisplay(when),
2824                        SchemaDisplay(then),
2825                    )?;
2826                }
2827
2828                if let Some(e) = else_expr {
2829                    write!(f, "ELSE {} ", SchemaDisplay(e))?;
2830                }
2831
2832                write!(f, "END")
2833            }
2834            // Cast expr is not shown to be consistent with Postgres and Spark <https://github.com/apache/datafusion/pull/3222>
2835            Expr::Cast(Cast { expr, .. }) | Expr::TryCast(TryCast { expr, .. }) => {
2836                write!(f, "{}", SchemaDisplay(expr))
2837            }
2838            Expr::InList(InList {
2839                expr,
2840                list,
2841                negated,
2842            }) => {
2843                let inlist_name = schema_name_from_exprs(list)?;
2844
2845                if *negated {
2846                    write!(f, "{} NOT IN {}", SchemaDisplay(expr), inlist_name)
2847                } else {
2848                    write!(f, "{} IN {}", SchemaDisplay(expr), inlist_name)
2849                }
2850            }
2851            Expr::Exists(Exists { negated: true, .. }) => write!(f, "NOT EXISTS"),
2852            Expr::Exists(Exists { negated: false, .. }) => write!(f, "EXISTS"),
2853            Expr::GroupingSet(GroupingSet::Cube(exprs)) => {
2854                write!(f, "ROLLUP ({})", schema_name_from_exprs(exprs)?)
2855            }
2856            Expr::GroupingSet(GroupingSet::GroupingSets(lists_of_exprs)) => {
2857                write!(f, "GROUPING SETS (")?;
2858                for exprs in lists_of_exprs.iter() {
2859                    write!(f, "({})", schema_name_from_exprs(exprs)?)?;
2860                }
2861                write!(f, ")")
2862            }
2863            Expr::GroupingSet(GroupingSet::Rollup(exprs)) => {
2864                write!(f, "ROLLUP ({})", schema_name_from_exprs(exprs)?)
2865            }
2866            Expr::IsNull(expr) => write!(f, "{} IS NULL", SchemaDisplay(expr)),
2867            Expr::IsNotNull(expr) => {
2868                write!(f, "{} IS NOT NULL", SchemaDisplay(expr))
2869            }
2870            Expr::IsUnknown(expr) => {
2871                write!(f, "{} IS UNKNOWN", SchemaDisplay(expr))
2872            }
2873            Expr::IsNotUnknown(expr) => {
2874                write!(f, "{} IS NOT UNKNOWN", SchemaDisplay(expr))
2875            }
2876            Expr::InSubquery(InSubquery { negated: true, .. }) => {
2877                write!(f, "NOT IN")
2878            }
2879            Expr::InSubquery(InSubquery { negated: false, .. }) => write!(f, "IN"),
2880            Expr::IsTrue(expr) => write!(f, "{} IS TRUE", SchemaDisplay(expr)),
2881            Expr::IsFalse(expr) => write!(f, "{} IS FALSE", SchemaDisplay(expr)),
2882            Expr::IsNotTrue(expr) => {
2883                write!(f, "{} IS NOT TRUE", SchemaDisplay(expr))
2884            }
2885            Expr::IsNotFalse(expr) => {
2886                write!(f, "{} IS NOT FALSE", SchemaDisplay(expr))
2887            }
2888            Expr::Like(Like {
2889                negated,
2890                expr,
2891                pattern,
2892                escape_char,
2893                case_insensitive,
2894            }) => {
2895                write!(
2896                    f,
2897                    "{} {}{} {}",
2898                    SchemaDisplay(expr),
2899                    if *negated { "NOT " } else { "" },
2900                    if *case_insensitive { "ILIKE" } else { "LIKE" },
2901                    SchemaDisplay(pattern),
2902                )?;
2903
2904                if let Some(char) = escape_char {
2905                    write!(f, " CHAR '{char}'")?;
2906                }
2907
2908                Ok(())
2909            }
2910            Expr::Negative(expr) => write!(f, "(- {})", SchemaDisplay(expr)),
2911            Expr::Not(expr) => write!(f, "NOT {}", SchemaDisplay(expr)),
2912            Expr::Unnest(Unnest { expr }) => {
2913                write!(f, "UNNEST({})", SchemaDisplay(expr))
2914            }
2915            Expr::ScalarFunction(ScalarFunction { func, args }) => {
2916                match func.schema_name(args) {
2917                    Ok(name) => {
2918                        write!(f, "{name}")
2919                    }
2920                    Err(e) => {
2921                        write!(f, "got error from schema_name {e}")
2922                    }
2923                }
2924            }
2925            Expr::ScalarSubquery(Subquery { subquery, .. }) => {
2926                write!(f, "{}", subquery.schema().field(0).name())
2927            }
2928            Expr::SimilarTo(Like {
2929                negated,
2930                expr,
2931                pattern,
2932                escape_char,
2933                ..
2934            }) => {
2935                write!(
2936                    f,
2937                    "{} {} {}",
2938                    SchemaDisplay(expr),
2939                    if *negated {
2940                        "NOT SIMILAR TO"
2941                    } else {
2942                        "SIMILAR TO"
2943                    },
2944                    SchemaDisplay(pattern),
2945                )?;
2946                if let Some(char) = escape_char {
2947                    write!(f, " CHAR '{char}'")?;
2948                }
2949
2950                Ok(())
2951            }
2952            Expr::WindowFunction(window_fun) => {
2953                let WindowFunction { fun, params } = window_fun.as_ref();
2954                match fun {
2955                    WindowFunctionDefinition::AggregateUDF(fun) => {
2956                        match fun.window_function_schema_name(params) {
2957                            Ok(name) => {
2958                                write!(f, "{name}")
2959                            }
2960                            Err(e) => {
2961                                write!(
2962                                    f,
2963                                    "got error from window_function_schema_name {e}"
2964                                )
2965                            }
2966                        }
2967                    }
2968                    _ => {
2969                        let WindowFunctionParams {
2970                            args,
2971                            partition_by,
2972                            order_by,
2973                            window_frame,
2974                            filter,
2975                            null_treatment,
2976                            distinct,
2977                        } = params;
2978
2979                        // Write function name and open parenthesis
2980                        write!(f, "{fun}(")?;
2981
2982                        // If DISTINCT, emit the keyword
2983                        if *distinct {
2984                            write!(f, "DISTINCT ")?;
2985                        }
2986
2987                        // Write the comma‑separated argument list
2988                        write!(
2989                            f,
2990                            "{}",
2991                            schema_name_from_exprs_comma_separated_without_space(args)?
2992                        )?;
2993
2994                        // **Close the argument parenthesis**
2995                        write!(f, ")")?;
2996
2997                        if let Some(null_treatment) = null_treatment {
2998                            write!(f, " {null_treatment}")?;
2999                        }
3000
3001                        if let Some(filter) = filter {
3002                            write!(f, " FILTER (WHERE {filter})")?;
3003                        }
3004
3005                        if !partition_by.is_empty() {
3006                            write!(
3007                                f,
3008                                " PARTITION BY [{}]",
3009                                schema_name_from_exprs(partition_by)?
3010                            )?;
3011                        }
3012
3013                        if !order_by.is_empty() {
3014                            write!(
3015                                f,
3016                                " ORDER BY [{}]",
3017                                schema_name_from_sorts(order_by)?
3018                            )?;
3019                        };
3020
3021                        write!(f, " {window_frame}")
3022                    }
3023                }
3024            }
3025            Expr::Lambda(Lambda {
3026                params,
3027                body,
3028            }) => {
3029                write!(f, "({}) -> {body}", display_comma_separated(params))
3030            }
3031        }
3032    }
3033}
3034
3035/// A helper struct for displaying an `Expr` as an SQL-like string.
3036struct SqlDisplay<'a>(&'a Expr);
3037
3038impl Display for SqlDisplay<'_> {
3039    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
3040        match self.0 {
3041            Expr::Literal(scalar, _) => scalar.fmt(f),
3042            Expr::Alias(Alias { name, .. }) => write!(f, "{name}"),
3043            Expr::Between(Between {
3044                expr,
3045                negated,
3046                low,
3047                high,
3048            }) => {
3049                if *negated {
3050                    write!(
3051                        f,
3052                        "{} NOT BETWEEN {} AND {}",
3053                        SqlDisplay(expr),
3054                        SqlDisplay(low),
3055                        SqlDisplay(high),
3056                    )
3057                } else {
3058                    write!(
3059                        f,
3060                        "{} BETWEEN {} AND {}",
3061                        SqlDisplay(expr),
3062                        SqlDisplay(low),
3063                        SqlDisplay(high),
3064                    )
3065                }
3066            }
3067            Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
3068                write!(f, "{} {op} {}", SqlDisplay(left), SqlDisplay(right),)
3069            }
3070            Expr::Case(Case {
3071                expr,
3072                when_then_expr,
3073                else_expr,
3074            }) => {
3075                write!(f, "CASE ")?;
3076
3077                if let Some(e) = expr {
3078                    write!(f, "{} ", SqlDisplay(e))?;
3079                }
3080
3081                for (when, then) in when_then_expr {
3082                    write!(f, "WHEN {} THEN {} ", SqlDisplay(when), SqlDisplay(then),)?;
3083                }
3084
3085                if let Some(e) = else_expr {
3086                    write!(f, "ELSE {} ", SqlDisplay(e))?;
3087                }
3088
3089                write!(f, "END")
3090            }
3091            Expr::Cast(Cast { expr, .. }) | Expr::TryCast(TryCast { expr, .. }) => {
3092                write!(f, "{}", SqlDisplay(expr))
3093            }
3094            Expr::InList(InList {
3095                expr,
3096                list,
3097                negated,
3098            }) => {
3099                write!(
3100                    f,
3101                    "{}{} IN {}",
3102                    SqlDisplay(expr),
3103                    if *negated { " NOT" } else { "" },
3104                    ExprListDisplay::comma_separated(list.as_slice())
3105                )
3106            }
3107            Expr::GroupingSet(GroupingSet::Cube(exprs)) => {
3108                write!(
3109                    f,
3110                    "ROLLUP ({})",
3111                    ExprListDisplay::comma_separated(exprs.as_slice())
3112                )
3113            }
3114            Expr::GroupingSet(GroupingSet::GroupingSets(lists_of_exprs)) => {
3115                write!(f, "GROUPING SETS (")?;
3116                for exprs in lists_of_exprs.iter() {
3117                    write!(
3118                        f,
3119                        "({})",
3120                        ExprListDisplay::comma_separated(exprs.as_slice())
3121                    )?;
3122                }
3123                write!(f, ")")
3124            }
3125            Expr::GroupingSet(GroupingSet::Rollup(exprs)) => {
3126                write!(
3127                    f,
3128                    "ROLLUP ({})",
3129                    ExprListDisplay::comma_separated(exprs.as_slice())
3130                )
3131            }
3132            Expr::IsNull(expr) => write!(f, "{} IS NULL", SqlDisplay(expr)),
3133            Expr::IsNotNull(expr) => {
3134                write!(f, "{} IS NOT NULL", SqlDisplay(expr))
3135            }
3136            Expr::IsUnknown(expr) => {
3137                write!(f, "{} IS UNKNOWN", SqlDisplay(expr))
3138            }
3139            Expr::IsNotUnknown(expr) => {
3140                write!(f, "{} IS NOT UNKNOWN", SqlDisplay(expr))
3141            }
3142            Expr::IsTrue(expr) => write!(f, "{} IS TRUE", SqlDisplay(expr)),
3143            Expr::IsFalse(expr) => write!(f, "{} IS FALSE", SqlDisplay(expr)),
3144            Expr::IsNotTrue(expr) => {
3145                write!(f, "{} IS NOT TRUE", SqlDisplay(expr))
3146            }
3147            Expr::IsNotFalse(expr) => {
3148                write!(f, "{} IS NOT FALSE", SqlDisplay(expr))
3149            }
3150            Expr::Like(Like {
3151                negated,
3152                expr,
3153                pattern,
3154                escape_char,
3155                case_insensitive,
3156            }) => {
3157                write!(
3158                    f,
3159                    "{} {}{} {}",
3160                    SqlDisplay(expr),
3161                    if *negated { "NOT " } else { "" },
3162                    if *case_insensitive { "ILIKE" } else { "LIKE" },
3163                    SqlDisplay(pattern),
3164                )?;
3165
3166                if let Some(char) = escape_char {
3167                    write!(f, " CHAR '{char}'")?;
3168                }
3169
3170                Ok(())
3171            }
3172            Expr::Negative(expr) => write!(f, "(- {})", SqlDisplay(expr)),
3173            Expr::Not(expr) => write!(f, "NOT {}", SqlDisplay(expr)),
3174            Expr::Unnest(Unnest { expr }) => {
3175                write!(f, "UNNEST({})", SqlDisplay(expr))
3176            }
3177            Expr::SimilarTo(Like {
3178                negated,
3179                expr,
3180                pattern,
3181                escape_char,
3182                ..
3183            }) => {
3184                write!(
3185                    f,
3186                    "{} {} {}",
3187                    SqlDisplay(expr),
3188                    if *negated {
3189                        "NOT SIMILAR TO"
3190                    } else {
3191                        "SIMILAR TO"
3192                    },
3193                    SqlDisplay(pattern),
3194                )?;
3195                if let Some(char) = escape_char {
3196                    write!(f, " CHAR '{char}'")?;
3197                }
3198
3199                Ok(())
3200            }
3201            Expr::AggregateFunction(AggregateFunction { func, params }) => {
3202                match func.human_display(params) {
3203                    Ok(name) => {
3204                        write!(f, "{name}")
3205                    }
3206                    Err(e) => {
3207                        write!(f, "got error from schema_name {e}")
3208                    }
3209                }
3210            }
3211            Expr::Lambda(Lambda {
3212                params,
3213                body,
3214            }) => {
3215                write!(f, "({}) -> {}", params.join(", "), SchemaDisplay(body))
3216            }
3217            _ => write!(f, "{}", self.0),
3218        }
3219    }
3220}
3221
3222/// Get schema_name for Vector of expressions
3223///
3224/// Internal usage. Please call `schema_name_from_exprs` instead
3225// TODO: Use ", " to standardize the formatting of Vec<Expr>,
3226// <https://github.com/apache/datafusion/issues/10364>
3227pub(crate) fn schema_name_from_exprs_comma_separated_without_space(
3228    exprs: &[Expr],
3229) -> Result<String, fmt::Error> {
3230    schema_name_from_exprs_inner(exprs, ",")
3231}
3232
3233/// Formats a list of `&Expr` with a custom separator using SQL display format
3234pub struct ExprListDisplay<'a> {
3235    exprs: &'a [Expr],
3236    sep: &'a str,
3237}
3238
3239impl<'a> ExprListDisplay<'a> {
3240    /// Create a new display struct with the given expressions and separator
3241    pub fn new(exprs: &'a [Expr], sep: &'a str) -> Self {
3242        Self { exprs, sep }
3243    }
3244
3245    /// Create a new display struct with comma-space separator
3246    pub fn comma_separated(exprs: &'a [Expr]) -> Self {
3247        Self::new(exprs, ", ")
3248    }
3249}
3250
3251impl Display for ExprListDisplay<'_> {
3252    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
3253        let mut first = true;
3254        for expr in self.exprs {
3255            if !first {
3256                write!(f, "{}", self.sep)?;
3257            }
3258            write!(f, "{}", SqlDisplay(expr))?;
3259            first = false;
3260        }
3261        Ok(())
3262    }
3263}
3264
3265/// Get schema_name for Vector of expressions
3266pub fn schema_name_from_exprs(exprs: &[Expr]) -> Result<String, fmt::Error> {
3267    schema_name_from_exprs_inner(exprs, ", ")
3268}
3269
3270fn schema_name_from_exprs_inner(exprs: &[Expr], sep: &str) -> Result<String, fmt::Error> {
3271    let mut s = String::new();
3272    for (i, e) in exprs.iter().enumerate() {
3273        if i > 0 {
3274            write!(&mut s, "{sep}")?;
3275        }
3276        write!(&mut s, "{}", SchemaDisplay(e))?;
3277    }
3278
3279    Ok(s)
3280}
3281
3282pub fn schema_name_from_sorts(sorts: &[Sort]) -> Result<String, fmt::Error> {
3283    let mut s = String::new();
3284    for (i, e) in sorts.iter().enumerate() {
3285        if i > 0 {
3286            write!(&mut s, ", ")?;
3287        }
3288        let ordering = if e.asc { "ASC" } else { "DESC" };
3289        let nulls_ordering = if e.nulls_first {
3290            "NULLS FIRST"
3291        } else {
3292            "NULLS LAST"
3293        };
3294        write!(&mut s, "{} {} {}", e.expr, ordering, nulls_ordering)?;
3295    }
3296
3297    Ok(s)
3298}
3299
3300pub const OUTER_REFERENCE_COLUMN_PREFIX: &str = "outer_ref";
3301pub const UNNEST_COLUMN_PREFIX: &str = "UNNEST";
3302
3303/// Format expressions for display as part of a logical plan. In many cases, this will produce
3304/// similar output to `Expr.name()` except that column names will be prefixed with '#'.
3305impl Display for Expr {
3306    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
3307        match self {
3308            Expr::Alias(Alias { expr, name, .. }) => write!(f, "{expr} AS {name}"),
3309            Expr::Column(c) => write!(f, "{c}"),
3310            Expr::OuterReferenceColumn(_, c) => {
3311                write!(f, "{OUTER_REFERENCE_COLUMN_PREFIX}({c})")
3312            }
3313            Expr::ScalarVariable(_, var_names) => write!(f, "{}", var_names.join(".")),
3314            Expr::Literal(v, metadata) => {
3315                match metadata.as_ref().map(|m| m.is_empty()).unwrap_or(true) {
3316                    false => write!(f, "{v:?} {:?}", metadata.as_ref().unwrap()),
3317                    true => write!(f, "{v:?}"),
3318                }
3319            }
3320            Expr::Case(case) => {
3321                write!(f, "CASE ")?;
3322                if let Some(e) = &case.expr {
3323                    write!(f, "{e} ")?;
3324                }
3325                for (w, t) in &case.when_then_expr {
3326                    write!(f, "WHEN {w} THEN {t} ")?;
3327                }
3328                if let Some(e) = &case.else_expr {
3329                    write!(f, "ELSE {e} ")?;
3330                }
3331                write!(f, "END")
3332            }
3333            Expr::Cast(Cast { expr, data_type }) => {
3334                write!(f, "CAST({expr} AS {data_type})")
3335            }
3336            Expr::TryCast(TryCast { expr, data_type }) => {
3337                write!(f, "TRY_CAST({expr} AS {data_type})")
3338            }
3339            Expr::Not(expr) => write!(f, "NOT {expr}"),
3340            Expr::Negative(expr) => write!(f, "(- {expr})"),
3341            Expr::IsNull(expr) => write!(f, "{expr} IS NULL"),
3342            Expr::IsNotNull(expr) => write!(f, "{expr} IS NOT NULL"),
3343            Expr::IsTrue(expr) => write!(f, "{expr} IS TRUE"),
3344            Expr::IsFalse(expr) => write!(f, "{expr} IS FALSE"),
3345            Expr::IsUnknown(expr) => write!(f, "{expr} IS UNKNOWN"),
3346            Expr::IsNotTrue(expr) => write!(f, "{expr} IS NOT TRUE"),
3347            Expr::IsNotFalse(expr) => write!(f, "{expr} IS NOT FALSE"),
3348            Expr::IsNotUnknown(expr) => write!(f, "{expr} IS NOT UNKNOWN"),
3349            Expr::Exists(Exists {
3350                subquery,
3351                negated: true,
3352            }) => write!(f, "NOT EXISTS ({subquery:?})"),
3353            Expr::Exists(Exists {
3354                subquery,
3355                negated: false,
3356            }) => write!(f, "EXISTS ({subquery:?})"),
3357            Expr::InSubquery(InSubquery {
3358                expr,
3359                subquery,
3360                negated: true,
3361            }) => write!(f, "{expr} NOT IN ({subquery:?})"),
3362            Expr::InSubquery(InSubquery {
3363                expr,
3364                subquery,
3365                negated: false,
3366            }) => write!(f, "{expr} IN ({subquery:?})"),
3367            Expr::ScalarSubquery(subquery) => write!(f, "({subquery:?})"),
3368            Expr::BinaryExpr(expr) => write!(f, "{expr}"),
3369            Expr::ScalarFunction(fun) => {
3370                fmt_function(f, fun.name(), false, &fun.args, true)
3371            }
3372            Expr::WindowFunction(window_fun) => {
3373                let WindowFunction { fun, params } = window_fun.as_ref();
3374                match fun {
3375                    WindowFunctionDefinition::AggregateUDF(fun) => {
3376                        match fun.window_function_display_name(params) {
3377                            Ok(name) => {
3378                                write!(f, "{name}")
3379                            }
3380                            Err(e) => {
3381                                write!(
3382                                    f,
3383                                    "got error from window_function_display_name {e}"
3384                                )
3385                            }
3386                        }
3387                    }
3388                    WindowFunctionDefinition::WindowUDF(fun) => {
3389                        let WindowFunctionParams {
3390                            args,
3391                            partition_by,
3392                            order_by,
3393                            window_frame,
3394                            filter,
3395                            null_treatment,
3396                            distinct,
3397                        } = params;
3398
3399                        fmt_function(f, &fun.to_string(), *distinct, args, true)?;
3400
3401                        if let Some(nt) = null_treatment {
3402                            write!(f, "{nt}")?;
3403                        }
3404
3405                        if let Some(fe) = filter {
3406                            write!(f, " FILTER (WHERE {fe})")?;
3407                        }
3408
3409                        if !partition_by.is_empty() {
3410                            write!(f, " PARTITION BY [{}]", expr_vec_fmt!(partition_by))?;
3411                        }
3412                        if !order_by.is_empty() {
3413                            write!(f, " ORDER BY [{}]", expr_vec_fmt!(order_by))?;
3414                        }
3415                        write!(
3416                            f,
3417                            " {} BETWEEN {} AND {}",
3418                            window_frame.units,
3419                            window_frame.start_bound,
3420                            window_frame.end_bound
3421                        )
3422                    }
3423                }
3424            }
3425            Expr::AggregateFunction(AggregateFunction { func, params }) => {
3426                match func.display_name(params) {
3427                    Ok(name) => {
3428                        write!(f, "{name}")
3429                    }
3430                    Err(e) => {
3431                        write!(f, "got error from display_name {e}")
3432                    }
3433                }
3434            }
3435            Expr::Between(Between {
3436                expr,
3437                negated,
3438                low,
3439                high,
3440            }) => {
3441                if *negated {
3442                    write!(f, "{expr} NOT BETWEEN {low} AND {high}")
3443                } else {
3444                    write!(f, "{expr} BETWEEN {low} AND {high}")
3445                }
3446            }
3447            Expr::Like(Like {
3448                negated,
3449                expr,
3450                pattern,
3451                escape_char,
3452                case_insensitive,
3453            }) => {
3454                write!(f, "{expr}")?;
3455                let op_name = if *case_insensitive { "ILIKE" } else { "LIKE" };
3456                if *negated {
3457                    write!(f, " NOT")?;
3458                }
3459                if let Some(char) = escape_char {
3460                    write!(f, " {op_name} {pattern} ESCAPE '{char}'")
3461                } else {
3462                    write!(f, " {op_name} {pattern}")
3463                }
3464            }
3465            Expr::SimilarTo(Like {
3466                negated,
3467                expr,
3468                pattern,
3469                escape_char,
3470                case_insensitive: _,
3471            }) => {
3472                write!(f, "{expr}")?;
3473                if *negated {
3474                    write!(f, " NOT")?;
3475                }
3476                if let Some(char) = escape_char {
3477                    write!(f, " SIMILAR TO {pattern} ESCAPE '{char}'")
3478                } else {
3479                    write!(f, " SIMILAR TO {pattern}")
3480                }
3481            }
3482            Expr::InList(InList {
3483                expr,
3484                list,
3485                negated,
3486            }) => {
3487                if *negated {
3488                    write!(f, "{expr} NOT IN ([{}])", expr_vec_fmt!(list))
3489                } else {
3490                    write!(f, "{expr} IN ([{}])", expr_vec_fmt!(list))
3491                }
3492            }
3493            #[expect(deprecated)]
3494            Expr::Wildcard { qualifier, options } => match qualifier {
3495                Some(qualifier) => write!(f, "{qualifier}.*{options}"),
3496                None => write!(f, "*{options}"),
3497            },
3498            Expr::GroupingSet(grouping_sets) => match grouping_sets {
3499                GroupingSet::Rollup(exprs) => {
3500                    // ROLLUP (c0, c1, c2)
3501                    write!(f, "ROLLUP ({})", expr_vec_fmt!(exprs))
3502                }
3503                GroupingSet::Cube(exprs) => {
3504                    // CUBE (c0, c1, c2)
3505                    write!(f, "CUBE ({})", expr_vec_fmt!(exprs))
3506                }
3507                GroupingSet::GroupingSets(lists_of_exprs) => {
3508                    // GROUPING SETS ((c0), (c1, c2), (c3, c4))
3509                    write!(
3510                        f,
3511                        "GROUPING SETS ({})",
3512                        lists_of_exprs
3513                            .iter()
3514                            .map(|exprs| format!("({})", expr_vec_fmt!(exprs)))
3515                            .collect::<Vec<String>>()
3516                            .join(", ")
3517                    )
3518                }
3519            },
3520            Expr::Placeholder(Placeholder { id, .. }) => write!(f, "{id}"),
3521            Expr::Unnest(Unnest { expr }) => {
3522                write!(f, "{UNNEST_COLUMN_PREFIX}({expr})")
3523            }
3524            Expr::Lambda(Lambda {
3525                params,
3526                body,
3527            }) => {
3528                write!(f, "({}) -> {body}", params.join(", "))
3529            }
3530        }
3531    }
3532}
3533
3534fn fmt_function(
3535    f: &mut Formatter,
3536    fun: &str,
3537    distinct: bool,
3538    args: &[Expr],
3539    display: bool,
3540) -> fmt::Result {
3541    let args: Vec<String> = match display {
3542        true => args.iter().map(|arg| format!("{arg}")).collect(),
3543        false => args.iter().map(|arg| format!("{arg:?}")).collect(),
3544    };
3545
3546    let distinct_str = match distinct {
3547        true => "DISTINCT ",
3548        false => "",
3549    };
3550    write!(f, "{}({}{})", fun, distinct_str, args.join(", "))
3551}
3552
3553/// The name of the column (field) that this `Expr` will produce in the physical plan.
3554/// The difference from [Expr::schema_name] is that top-level columns are unqualified.
3555pub fn physical_name(expr: &Expr) -> Result<String> {
3556    match expr {
3557        Expr::Column(col) => Ok(col.name.clone()),
3558        Expr::Alias(alias) => Ok(alias.name.clone()),
3559        _ => Ok(expr.schema_name().to_string()),
3560    }
3561}
3562
3563#[cfg(test)]
3564mod test {
3565    use crate::expr_fn::col;
3566    use crate::{
3567        case, lit, placeholder, qualified_wildcard, wildcard, wildcard_with_options,
3568        ColumnarValue, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Volatility,
3569    };
3570    use arrow::datatypes::{Field, Schema};
3571    use sqlparser::ast;
3572    use sqlparser::ast::{Ident, IdentWithAlias};
3573    use std::any::Any;
3574
3575    #[test]
3576    fn infer_placeholder_in_clause() {
3577        // SELECT * FROM employees WHERE department_id IN ($1, $2, $3);
3578        let column = col("department_id");
3579        let param_placeholders = vec![
3580            Expr::Placeholder(Placeholder {
3581                id: "$1".to_string(),
3582                field: None,
3583            }),
3584            Expr::Placeholder(Placeholder {
3585                id: "$2".to_string(),
3586                field: None,
3587            }),
3588            Expr::Placeholder(Placeholder {
3589                id: "$3".to_string(),
3590                field: None,
3591            }),
3592        ];
3593        let in_list = Expr::InList(InList {
3594            expr: Box::new(column),
3595            list: param_placeholders,
3596            negated: false,
3597        });
3598
3599        let schema = Arc::new(Schema::new(vec![
3600            Field::new("name", DataType::Utf8, true),
3601            Field::new("department_id", DataType::Int32, true),
3602        ]));
3603        let df_schema = DFSchema::try_from(schema).unwrap();
3604
3605        let (inferred_expr, contains_placeholder) =
3606            in_list.infer_placeholder_types(&df_schema).unwrap();
3607
3608        assert!(contains_placeholder);
3609
3610        match inferred_expr {
3611            Expr::InList(in_list) => {
3612                for expr in in_list.list {
3613                    match expr {
3614                        Expr::Placeholder(placeholder) => {
3615                            assert_eq!(
3616                                placeholder.field.unwrap().data_type(),
3617                                &DataType::Int32,
3618                                "Placeholder {} should infer Int32",
3619                                placeholder.id
3620                            );
3621                        }
3622                        _ => panic!("Expected Placeholder expression"),
3623                    }
3624                }
3625            }
3626            _ => panic!("Expected InList expression"),
3627        }
3628    }
3629
3630    #[test]
3631    fn infer_placeholder_like_and_similar_to() {
3632        // name LIKE $1
3633        let schema =
3634            Arc::new(Schema::new(vec![Field::new("name", DataType::Utf8, true)]));
3635        let df_schema = DFSchema::try_from(schema).unwrap();
3636
3637        let like = Like {
3638            expr: Box::new(col("name")),
3639            pattern: Box::new(Expr::Placeholder(Placeholder {
3640                id: "$1".to_string(),
3641                field: None,
3642            })),
3643            negated: false,
3644            case_insensitive: false,
3645            escape_char: None,
3646        };
3647
3648        let expr = Expr::Like(like.clone());
3649
3650        let (inferred_expr, _) = expr.infer_placeholder_types(&df_schema).unwrap();
3651        match inferred_expr {
3652            Expr::Like(like) => match *like.pattern {
3653                Expr::Placeholder(placeholder) => {
3654                    assert_eq!(placeholder.field.unwrap().data_type(), &DataType::Utf8);
3655                }
3656                _ => panic!("Expected Placeholder"),
3657            },
3658            _ => panic!("Expected Like"),
3659        }
3660
3661        // name SIMILAR TO $1
3662        let expr = Expr::SimilarTo(like);
3663
3664        let (inferred_expr, _) = expr.infer_placeholder_types(&df_schema).unwrap();
3665        match inferred_expr {
3666            Expr::SimilarTo(like) => match *like.pattern {
3667                Expr::Placeholder(placeholder) => {
3668                    assert_eq!(
3669                        placeholder.field.unwrap().data_type(),
3670                        &DataType::Utf8,
3671                        "Placeholder {} should infer Utf8",
3672                        placeholder.id
3673                    );
3674                }
3675                _ => panic!("Expected Placeholder expression"),
3676            },
3677            _ => panic!("Expected SimilarTo expression"),
3678        }
3679    }
3680
3681    #[test]
3682    fn infer_placeholder_with_metadata() {
3683        // name == $1, where name is a non-nullable string
3684        let schema =
3685            Arc::new(Schema::new(vec![Field::new("name", DataType::Utf8, false)
3686                .with_metadata(
3687                    [("some_key".to_string(), "some_value".to_string())].into(),
3688                )]));
3689        let df_schema = DFSchema::try_from(schema).unwrap();
3690
3691        let expr = binary_expr(col("name"), Operator::Eq, placeholder("$1"));
3692
3693        let (inferred_expr, _) = expr.infer_placeholder_types(&df_schema).unwrap();
3694        match inferred_expr {
3695            Expr::BinaryExpr(BinaryExpr { right, .. }) => match *right {
3696                Expr::Placeholder(placeholder) => {
3697                    assert_eq!(
3698                        placeholder.field.as_ref().unwrap().data_type(),
3699                        &DataType::Utf8
3700                    );
3701                    assert_eq!(
3702                        placeholder.field.as_ref().unwrap().metadata(),
3703                        df_schema.field(0).metadata()
3704                    );
3705                    // Inferred placeholder should still be nullable
3706                    assert!(placeholder.field.as_ref().unwrap().is_nullable());
3707                }
3708                _ => panic!("Expected Placeholder"),
3709            },
3710            _ => panic!("Expected BinaryExpr"),
3711        }
3712    }
3713
3714    #[test]
3715    fn format_case_when() -> Result<()> {
3716        let expr = case(col("a"))
3717            .when(lit(1), lit(true))
3718            .when(lit(0), lit(false))
3719            .otherwise(lit(ScalarValue::Null))?;
3720        let expected = "CASE a WHEN Int32(1) THEN Boolean(true) WHEN Int32(0) THEN Boolean(false) ELSE NULL END";
3721        assert_eq!(expected, format!("{expr}"));
3722        Ok(())
3723    }
3724
3725    #[test]
3726    fn format_cast() -> Result<()> {
3727        let expr = Expr::Cast(Cast {
3728            expr: Box::new(Expr::Literal(ScalarValue::Float32(Some(1.23)), None)),
3729            data_type: DataType::Utf8,
3730        });
3731        let expected_canonical = "CAST(Float32(1.23) AS Utf8)";
3732        assert_eq!(expected_canonical, format!("{expr}"));
3733        // Note that CAST intentionally has a name that is different from its `Display`
3734        // representation. CAST does not change the name of expressions.
3735        assert_eq!("Float32(1.23)", expr.schema_name().to_string());
3736        Ok(())
3737    }
3738
3739    #[test]
3740    fn test_partial_ord() {
3741        // Test validates that partial ord is defined for Expr, not
3742        // intended to exhaustively test all possibilities
3743        let exp1 = col("a") + lit(1);
3744        let exp2 = col("a") + lit(2);
3745        let exp3 = !(col("a") + lit(2));
3746
3747        assert!(exp1 < exp2);
3748        assert!(exp3 > exp2);
3749        assert!(exp1 < exp3)
3750    }
3751
3752    #[test]
3753    fn test_collect_expr() -> Result<()> {
3754        // single column
3755        {
3756            let expr = &Expr::Cast(Cast::new(Box::new(col("a")), DataType::Float64));
3757            let columns = expr.column_refs();
3758            assert_eq!(1, columns.len());
3759            assert!(columns.contains(&Column::from_name("a")));
3760        }
3761
3762        // multiple columns
3763        {
3764            let expr = col("a") + col("b") + lit(1);
3765            let columns = expr.column_refs();
3766            assert_eq!(2, columns.len());
3767            assert!(columns.contains(&Column::from_name("a")));
3768            assert!(columns.contains(&Column::from_name("b")));
3769        }
3770
3771        Ok(())
3772    }
3773
3774    #[test]
3775    fn test_logical_ops() {
3776        assert_eq!(
3777            format!("{}", lit(1u32).eq(lit(2u32))),
3778            "UInt32(1) = UInt32(2)"
3779        );
3780        assert_eq!(
3781            format!("{}", lit(1u32).not_eq(lit(2u32))),
3782            "UInt32(1) != UInt32(2)"
3783        );
3784        assert_eq!(
3785            format!("{}", lit(1u32).gt(lit(2u32))),
3786            "UInt32(1) > UInt32(2)"
3787        );
3788        assert_eq!(
3789            format!("{}", lit(1u32).gt_eq(lit(2u32))),
3790            "UInt32(1) >= UInt32(2)"
3791        );
3792        assert_eq!(
3793            format!("{}", lit(1u32).lt(lit(2u32))),
3794            "UInt32(1) < UInt32(2)"
3795        );
3796        assert_eq!(
3797            format!("{}", lit(1u32).lt_eq(lit(2u32))),
3798            "UInt32(1) <= UInt32(2)"
3799        );
3800        assert_eq!(
3801            format!("{}", lit(1u32).and(lit(2u32))),
3802            "UInt32(1) AND UInt32(2)"
3803        );
3804        assert_eq!(
3805            format!("{}", lit(1u32).or(lit(2u32))),
3806            "UInt32(1) OR UInt32(2)"
3807        );
3808    }
3809
3810    #[test]
3811    fn test_is_volatile_scalar_func() {
3812        // UDF
3813        #[derive(Debug, PartialEq, Eq, Hash)]
3814        struct TestScalarUDF {
3815            signature: Signature,
3816        }
3817        impl ScalarUDFImpl for TestScalarUDF {
3818            fn as_any(&self) -> &dyn Any {
3819                self
3820            }
3821            fn name(&self) -> &str {
3822                "TestScalarUDF"
3823            }
3824
3825            fn signature(&self) -> &Signature {
3826                &self.signature
3827            }
3828
3829            fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
3830                Ok(DataType::Utf8)
3831            }
3832
3833            fn invoke_with_args(
3834                &self,
3835                _args: ScalarFunctionArgs,
3836            ) -> Result<ColumnarValue> {
3837                Ok(ColumnarValue::Scalar(ScalarValue::from("a")))
3838            }
3839        }
3840        let udf = Arc::new(ScalarUDF::from(TestScalarUDF {
3841            signature: Signature::uniform(1, vec![DataType::Float32], Volatility::Stable),
3842        }));
3843        assert_ne!(udf.signature().volatility, Volatility::Volatile);
3844
3845        let udf = Arc::new(ScalarUDF::from(TestScalarUDF {
3846            signature: Signature::uniform(
3847                1,
3848                vec![DataType::Float32],
3849                Volatility::Volatile,
3850            ),
3851        }));
3852        assert_eq!(udf.signature().volatility, Volatility::Volatile);
3853    }
3854
3855    use super::*;
3856
3857    #[test]
3858    fn test_display_wildcard() {
3859        assert_eq!(format!("{}", wildcard()), "*");
3860        assert_eq!(format!("{}", qualified_wildcard("t1")), "t1.*");
3861        assert_eq!(
3862            format!(
3863                "{}",
3864                wildcard_with_options(wildcard_options(
3865                    Some(IlikeSelectItem {
3866                        pattern: "c1".to_string()
3867                    }),
3868                    None,
3869                    None,
3870                    None,
3871                    None
3872                ))
3873            ),
3874            "* ILIKE 'c1'"
3875        );
3876        assert_eq!(
3877            format!(
3878                "{}",
3879                wildcard_with_options(wildcard_options(
3880                    None,
3881                    Some(ExcludeSelectItem::Multiple(vec![
3882                        Ident::from("c1"),
3883                        Ident::from("c2")
3884                    ])),
3885                    None,
3886                    None,
3887                    None
3888                ))
3889            ),
3890            "* EXCLUDE (c1, c2)"
3891        );
3892        assert_eq!(
3893            format!(
3894                "{}",
3895                wildcard_with_options(wildcard_options(
3896                    None,
3897                    None,
3898                    Some(ExceptSelectItem {
3899                        first_element: Ident::from("c1"),
3900                        additional_elements: vec![Ident::from("c2")]
3901                    }),
3902                    None,
3903                    None
3904                ))
3905            ),
3906            "* EXCEPT (c1, c2)"
3907        );
3908        assert_eq!(
3909            format!(
3910                "{}",
3911                wildcard_with_options(wildcard_options(
3912                    None,
3913                    None,
3914                    None,
3915                    Some(PlannedReplaceSelectItem {
3916                        items: vec![ReplaceSelectElement {
3917                            expr: ast::Expr::Identifier(Ident::from("c1")),
3918                            column_name: Ident::from("a1"),
3919                            as_keyword: false
3920                        }],
3921                        planned_expressions: vec![]
3922                    }),
3923                    None
3924                ))
3925            ),
3926            "* REPLACE (c1 a1)"
3927        );
3928        assert_eq!(
3929            format!(
3930                "{}",
3931                wildcard_with_options(wildcard_options(
3932                    None,
3933                    None,
3934                    None,
3935                    None,
3936                    Some(RenameSelectItem::Multiple(vec![IdentWithAlias {
3937                        ident: Ident::from("c1"),
3938                        alias: Ident::from("a1")
3939                    }]))
3940                ))
3941            ),
3942            "* RENAME (c1 AS a1)"
3943        )
3944    }
3945
3946    #[test]
3947    fn test_schema_display_alias_with_relation() {
3948        assert_eq!(
3949            format!(
3950                "{}",
3951                SchemaDisplay(
3952                    &lit(1).alias_qualified("table_name".into(), "column_name")
3953                )
3954            ),
3955            "table_name.column_name"
3956        );
3957    }
3958
3959    #[test]
3960    fn test_schema_display_alias_without_relation() {
3961        assert_eq!(
3962            format!(
3963                "{}",
3964                SchemaDisplay(&lit(1).alias_qualified(None::<&str>, "column_name"))
3965            ),
3966            "column_name"
3967        );
3968    }
3969
3970    fn wildcard_options(
3971        opt_ilike: Option<IlikeSelectItem>,
3972        opt_exclude: Option<ExcludeSelectItem>,
3973        opt_except: Option<ExceptSelectItem>,
3974        opt_replace: Option<PlannedReplaceSelectItem>,
3975        opt_rename: Option<RenameSelectItem>,
3976    ) -> WildcardOptions {
3977        WildcardOptions {
3978            ilike: opt_ilike,
3979            exclude: opt_exclude,
3980            except: opt_except,
3981            replace: opt_replace,
3982            rename: opt_rename,
3983        }
3984    }
3985
3986    #[test]
3987    fn test_size_of_expr() {
3988        // because Expr is such a widely used struct in DataFusion
3989        // it is important to keep its size as small as possible
3990        //
3991        // If this test fails when you change `Expr`, please try
3992        // `Box`ing the fields to make `Expr` smaller
3993        // See https://github.com/apache/datafusion/issues/16199 for details
3994        assert_eq!(size_of::<Expr>(), 112);
3995        assert_eq!(size_of::<ScalarValue>(), 64);
3996        assert_eq!(size_of::<DataType>(), 24); // 3 ptrs
3997        assert_eq!(size_of::<Vec<Expr>>(), 24);
3998        assert_eq!(size_of::<Arc<Expr>>(), 8);
3999    }
4000
4001    #[test]
4002    fn test_accept_exprs() {
4003        fn accept_exprs<E: AsRef<Expr>>(_: &[E]) {}
4004
4005        let expr = || -> Expr { lit(1) };
4006
4007        // Call accept_exprs with owned expressions
4008        let owned_exprs = vec![expr(), expr()];
4009        accept_exprs(&owned_exprs);
4010
4011        // Call accept_exprs with expressions from expr tree
4012        let udf = Expr::ScalarFunction(ScalarFunction {
4013            func: Arc::new(ScalarUDF::new_from_impl(TestUDF {})),
4014            args: vec![expr(), expr()],
4015        });
4016        let Expr::ScalarFunction(scalar) = &udf else {
4017            unreachable!()
4018        };
4019        accept_exprs(&scalar.args);
4020
4021        // Call accept_exprs with expressions collected from expr tree, without cloning
4022        let mut collected_refs: Vec<&Expr> = scalar.args.iter().collect();
4023        collected_refs.extend(&owned_exprs);
4024        accept_exprs(&collected_refs);
4025
4026        // test helpers
4027        #[derive(Debug, PartialEq, Eq, Hash)]
4028        struct TestUDF {}
4029        impl ScalarUDFImpl for TestUDF {
4030            fn as_any(&self) -> &dyn Any {
4031                unimplemented!()
4032            }
4033
4034            fn name(&self) -> &str {
4035                unimplemented!()
4036            }
4037
4038            fn signature(&self) -> &Signature {
4039                unimplemented!()
4040            }
4041
4042            fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
4043                unimplemented!()
4044            }
4045
4046            fn invoke_with_args(
4047                &self,
4048                _args: ScalarFunctionArgs,
4049            ) -> Result<ColumnarValue> {
4050                unimplemented!()
4051            }
4052        }
4053    }
4054}