datafusion_sql/expr/
mod.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use arrow::datatypes::{DataType, TimeUnit};
19use datafusion_expr::planner::{
20    PlannerResult, RawBinaryExpr, RawDictionaryExpr, RawFieldAccessExpr,
21};
22use sqlparser::ast::{
23    AccessExpr, BinaryOperator, CastFormat, CastKind, CeilFloorKind,
24    DataType as SQLDataType, DateTimeField, DictionaryField, Expr as SQLExpr,
25    ExprWithAlias as SQLExprWithAlias, MapEntry, StructField, Subscript, TrimWhereField,
26    TypedString, Value, ValueWithSpan,
27};
28
29use datafusion_common::{
30    internal_datafusion_err, internal_err, not_impl_err, plan_err, DFSchema, Result,
31    ScalarValue,
32};
33
34use datafusion_expr::expr::ScalarFunction;
35use datafusion_expr::expr::{InList, WildcardOptions};
36use datafusion_expr::{
37    lit, Between, BinaryExpr, Cast, Expr, ExprSchemable, GetFieldAccess, Like, Literal,
38    Operator, TryCast,
39};
40
41use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
42
43mod binary_op;
44mod function;
45mod grouping_set;
46mod identifier;
47mod order_by;
48mod subquery;
49mod substring;
50mod unary_op;
51mod value;
52
53impl<S: ContextProvider> SqlToRel<'_, S> {
54    pub(crate) fn sql_expr_to_logical_expr_with_alias(
55        &self,
56        sql: SQLExprWithAlias,
57        schema: &DFSchema,
58        planner_context: &mut PlannerContext,
59    ) -> Result<Expr> {
60        let mut expr =
61            self.sql_expr_to_logical_expr(sql.expr, schema, planner_context)?;
62        if let Some(alias) = sql.alias {
63            expr = expr.alias(alias.value);
64        }
65        Ok(expr)
66    }
67    pub(crate) fn sql_expr_to_logical_expr(
68        &self,
69        sql: SQLExpr,
70        schema: &DFSchema,
71        planner_context: &mut PlannerContext,
72    ) -> Result<Expr> {
73        enum StackEntry {
74            SQLExpr(Box<SQLExpr>),
75            Operator(BinaryOperator),
76        }
77
78        // Virtual stack machine to convert SQLExpr to Expr
79        // This allows visiting the expr tree in a depth-first manner which
80        // produces expressions in postfix notations, i.e. `a + b` => `a b +`.
81        // See https://github.com/apache/datafusion/issues/1444
82        let mut stack = vec![StackEntry::SQLExpr(Box::new(sql))];
83        let mut eval_stack = vec![];
84
85        while let Some(entry) = stack.pop() {
86            match entry {
87                StackEntry::SQLExpr(sql_expr) => {
88                    match *sql_expr {
89                        SQLExpr::BinaryOp { left, op, right } => {
90                            // Note the order that we push the entries to the stack
91                            // is important. We want to visit the left node first.
92                            stack.push(StackEntry::Operator(op));
93                            stack.push(StackEntry::SQLExpr(right));
94                            stack.push(StackEntry::SQLExpr(left));
95                        }
96                        _ => {
97                            let expr = self.sql_expr_to_logical_expr_internal(
98                                *sql_expr,
99                                schema,
100                                planner_context,
101                            )?;
102                            eval_stack.push(expr);
103                        }
104                    }
105                }
106                StackEntry::Operator(op) => {
107                    let right = eval_stack.pop().unwrap();
108                    let left = eval_stack.pop().unwrap();
109                    let expr = self.build_logical_expr(op, left, right, schema)?;
110                    eval_stack.push(expr);
111                }
112            }
113        }
114
115        assert_eq!(1, eval_stack.len());
116        let expr = eval_stack.pop().unwrap();
117        Ok(expr)
118    }
119
120    fn build_logical_expr(
121        &self,
122        op: BinaryOperator,
123        left: Expr,
124        right: Expr,
125        schema: &DFSchema,
126    ) -> Result<Expr> {
127        // try extension planers
128        let mut binary_expr = RawBinaryExpr { op, left, right };
129        for planner in self.context_provider.get_expr_planners() {
130            match planner.plan_binary_op(binary_expr, schema)? {
131                PlannerResult::Planned(expr) => {
132                    return Ok(expr);
133                }
134                PlannerResult::Original(expr) => {
135                    binary_expr = expr;
136                }
137            }
138        }
139
140        let RawBinaryExpr { op, left, right } = binary_expr;
141        Ok(Expr::BinaryExpr(BinaryExpr::new(
142            Box::new(left),
143            self.parse_sql_binary_op(op)?,
144            Box::new(right),
145        )))
146    }
147
148    pub fn sql_to_expr_with_alias(
149        &self,
150        sql: SQLExprWithAlias,
151        schema: &DFSchema,
152        planner_context: &mut PlannerContext,
153    ) -> Result<Expr> {
154        let mut expr =
155            self.sql_expr_to_logical_expr_with_alias(sql, schema, planner_context)?;
156        expr = self.rewrite_partial_qualifier(expr, schema);
157        self.validate_schema_satisfies_exprs(schema, &[expr.clone()])?;
158        let (expr, _) = expr.infer_placeholder_types(schema)?;
159        Ok(expr)
160    }
161
162    /// Generate a relational expression from a SQL expression
163    pub fn sql_to_expr(
164        &self,
165        sql: SQLExpr,
166        schema: &DFSchema,
167        planner_context: &mut PlannerContext,
168    ) -> Result<Expr> {
169        // The location of the original SQL expression in the source code
170        let mut expr = self.sql_expr_to_logical_expr(sql, schema, planner_context)?;
171        expr = self.rewrite_partial_qualifier(expr, schema);
172        self.validate_schema_satisfies_exprs(schema, std::slice::from_ref(&expr))?;
173        let (expr, _) = expr.infer_placeholder_types(schema)?;
174        Ok(expr)
175    }
176
177    /// Rewrite aliases which are not-complete (e.g. ones that only include only table qualifier in a schema.table qualified relation)
178    fn rewrite_partial_qualifier(&self, expr: Expr, schema: &DFSchema) -> Expr {
179        match expr {
180            Expr::Column(col) => match &col.relation {
181                Some(q) => {
182                    match schema.iter().find(|(qualifier, field)| match qualifier {
183                        Some(field_q) => {
184                            field.name() == &col.name
185                                && field_q.to_string().ends_with(&format!(".{q}"))
186                        }
187                        _ => false,
188                    }) {
189                        Some((qualifier, df_field)) => Expr::from((qualifier, df_field)),
190                        None => Expr::Column(col),
191                    }
192                }
193                None => Expr::Column(col),
194            },
195            _ => expr,
196        }
197    }
198
199    /// Internal implementation. Use
200    /// [`Self::sql_expr_to_logical_expr`] to plan exprs.
201    #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
202    fn sql_expr_to_logical_expr_internal(
203        &self,
204        sql: SQLExpr,
205        schema: &DFSchema,
206        planner_context: &mut PlannerContext,
207    ) -> Result<Expr> {
208        // NOTE: This function is called recursively, so each match arm body should be as
209        //       small as possible to decrease stack requirement.
210        //       Follow the common pattern of extracting into a separate function for
211        //       non-trivial arms. See https://github.com/apache/datafusion/pull/12384 for
212        //       more context.
213        match sql {
214            SQLExpr::Value(value) => {
215                self.parse_value(value.into(), planner_context.prepare_param_data_types())
216            }
217            SQLExpr::Extract { field, expr, .. } => {
218                let mut extract_args = vec![
219                    Expr::Literal(ScalarValue::from(format!("{field}")), None),
220                    self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
221                ];
222
223                for planner in self.context_provider.get_expr_planners() {
224                    match planner.plan_extract(extract_args)? {
225                        PlannerResult::Planned(expr) => return Ok(expr),
226                        PlannerResult::Original(args) => {
227                            extract_args = args;
228                        }
229                    }
230                }
231
232                not_impl_err!("Extract not supported by ExprPlanner: {extract_args:?}")
233            }
234
235            SQLExpr::Array(arr) => self.sql_array_literal(arr.elem, schema),
236            SQLExpr::Interval(interval) => self.sql_interval_to_expr(false, interval),
237            SQLExpr::Identifier(id) => {
238                self.sql_identifier_to_expr(id, schema, planner_context)
239            }
240
241            // <expr>["foo"], <expr>[4] or <expr>[4:5]
242            SQLExpr::CompoundFieldAccess { root, access_chain } => self
243                .sql_compound_field_access_to_expr(
244                    *root,
245                    access_chain,
246                    schema,
247                    planner_context,
248                ),
249
250            SQLExpr::CompoundIdentifier(ids) => {
251                self.sql_compound_identifier_to_expr(ids, schema, planner_context)
252            }
253
254            SQLExpr::Case {
255                operand,
256                conditions,
257                else_result,
258                case_token: _,
259                end_token: _,
260            } => self.sql_case_identifier_to_expr(
261                operand,
262                conditions,
263                else_result,
264                schema,
265                planner_context,
266            ),
267
268            SQLExpr::Cast {
269                kind: CastKind::Cast | CastKind::DoubleColon,
270                expr,
271                data_type,
272                format,
273            } => self.sql_cast_to_expr(*expr, data_type, format, schema, planner_context),
274
275            SQLExpr::Cast {
276                kind: CastKind::TryCast | CastKind::SafeCast,
277                expr,
278                data_type,
279                format,
280            } => {
281                if let Some(format) = format {
282                    return not_impl_err!("CAST with format is not supported: {format}");
283                }
284
285                Ok(Expr::TryCast(TryCast::new(
286                    Box::new(self.sql_expr_to_logical_expr(
287                        *expr,
288                        schema,
289                        planner_context,
290                    )?),
291                    self.convert_data_type_to_field(&data_type)?
292                        .data_type()
293                        .clone(),
294                )))
295            }
296
297            SQLExpr::TypedString(TypedString {
298                data_type,
299                value,
300                uses_odbc_syntax: _,
301            }) => Ok(Expr::Cast(Cast::new(
302                Box::new(lit(value.into_string().unwrap())),
303                self.convert_data_type_to_field(&data_type)?
304                    .data_type()
305                    .clone(),
306            ))),
307
308            SQLExpr::IsNull(expr) => Ok(Expr::IsNull(Box::new(
309                self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
310            ))),
311
312            SQLExpr::IsNotNull(expr) => Ok(Expr::IsNotNull(Box::new(
313                self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
314            ))),
315
316            SQLExpr::IsDistinctFrom(left, right) => {
317                Ok(Expr::BinaryExpr(BinaryExpr::new(
318                    Box::new(self.sql_expr_to_logical_expr(
319                        *left,
320                        schema,
321                        planner_context,
322                    )?),
323                    Operator::IsDistinctFrom,
324                    Box::new(self.sql_expr_to_logical_expr(
325                        *right,
326                        schema,
327                        planner_context,
328                    )?),
329                )))
330            }
331
332            SQLExpr::IsNotDistinctFrom(left, right) => {
333                Ok(Expr::BinaryExpr(BinaryExpr::new(
334                    Box::new(self.sql_expr_to_logical_expr(
335                        *left,
336                        schema,
337                        planner_context,
338                    )?),
339                    Operator::IsNotDistinctFrom,
340                    Box::new(self.sql_expr_to_logical_expr(
341                        *right,
342                        schema,
343                        planner_context,
344                    )?),
345                )))
346            }
347
348            SQLExpr::IsTrue(expr) => Ok(Expr::IsTrue(Box::new(
349                self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
350            ))),
351
352            SQLExpr::IsFalse(expr) => Ok(Expr::IsFalse(Box::new(
353                self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
354            ))),
355
356            SQLExpr::IsNotTrue(expr) => Ok(Expr::IsNotTrue(Box::new(
357                self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
358            ))),
359
360            SQLExpr::IsNotFalse(expr) => Ok(Expr::IsNotFalse(Box::new(
361                self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
362            ))),
363
364            SQLExpr::IsUnknown(expr) => Ok(Expr::IsUnknown(Box::new(
365                self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
366            ))),
367
368            SQLExpr::IsNotUnknown(expr) => Ok(Expr::IsNotUnknown(Box::new(
369                self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
370            ))),
371
372            SQLExpr::UnaryOp { op, expr } => {
373                self.parse_sql_unary_op(op, *expr, schema, planner_context)
374            }
375
376            SQLExpr::Between {
377                expr,
378                negated,
379                low,
380                high,
381            } => Ok(Expr::Between(Between::new(
382                Box::new(self.sql_expr_to_logical_expr(
383                    *expr,
384                    schema,
385                    planner_context,
386                )?),
387                negated,
388                Box::new(self.sql_expr_to_logical_expr(*low, schema, planner_context)?),
389                Box::new(self.sql_expr_to_logical_expr(
390                    *high,
391                    schema,
392                    planner_context,
393                )?),
394            ))),
395
396            SQLExpr::InList {
397                expr,
398                list,
399                negated,
400            } => self.sql_in_list_to_expr(*expr, list, negated, schema, planner_context),
401
402            SQLExpr::Like {
403                negated,
404                expr,
405                pattern,
406                escape_char,
407                any,
408            } => self.sql_like_to_expr(
409                negated,
410                *expr,
411                *pattern,
412                escape_char,
413                schema,
414                planner_context,
415                false,
416                any,
417            ),
418
419            SQLExpr::ILike {
420                negated,
421                expr,
422                pattern,
423                escape_char,
424                any,
425            } => self.sql_like_to_expr(
426                negated,
427                *expr,
428                *pattern,
429                escape_char,
430                schema,
431                planner_context,
432                true,
433                any,
434            ),
435
436            SQLExpr::SimilarTo {
437                negated,
438                expr,
439                pattern,
440                escape_char,
441            } => self.sql_similarto_to_expr(
442                negated,
443                *expr,
444                *pattern,
445                escape_char,
446                schema,
447                planner_context,
448            ),
449
450            SQLExpr::BinaryOp { .. } => {
451                internal_err!("binary_op should be handled by sql_expr_to_logical_expr.")
452            }
453
454            #[cfg(feature = "unicode_expressions")]
455            SQLExpr::Substring {
456                expr,
457                substring_from,
458                substring_for,
459                special: _,
460                shorthand: _,
461            } => self.sql_substring_to_expr(
462                expr,
463                substring_from,
464                substring_for,
465                schema,
466                planner_context,
467            ),
468
469            #[cfg(not(feature = "unicode_expressions"))]
470            SQLExpr::Substring { .. } => {
471                internal_err!(
472                    "statement substring requires compilation with feature flag: unicode_expressions."
473                )
474            }
475
476            SQLExpr::Trim {
477                expr,
478                trim_where,
479                trim_what,
480                trim_characters,
481            } => self.sql_trim_to_expr(
482                *expr,
483                trim_where,
484                trim_what,
485                trim_characters,
486                schema,
487                planner_context,
488            ),
489
490            SQLExpr::Function(function) => {
491                self.sql_function_to_expr(function, schema, planner_context)
492            }
493
494            SQLExpr::Rollup(exprs) => {
495                self.sql_rollup_to_expr(exprs, schema, planner_context)
496            }
497            SQLExpr::Cube(exprs) => self.sql_cube_to_expr(exprs, schema, planner_context),
498            SQLExpr::GroupingSets(exprs) => {
499                self.sql_grouping_sets_to_expr(exprs, schema, planner_context)
500            }
501
502            SQLExpr::Floor { expr, field } => match field {
503                CeilFloorKind::DateTimeField(DateTimeField::NoDateTime) => {
504                    self.sql_fn_name_to_expr(*expr, "floor", schema, planner_context)
505                }
506                CeilFloorKind::DateTimeField(_) => {
507                    not_impl_err!("FLOOR with datetime is not supported")
508                }
509                CeilFloorKind::Scale(_) => {
510                    not_impl_err!("FLOOR with scale is not supported")
511                }
512            },
513            SQLExpr::Ceil { expr, field } => match field {
514                CeilFloorKind::DateTimeField(DateTimeField::NoDateTime) => {
515                    self.sql_fn_name_to_expr(*expr, "ceil", schema, planner_context)
516                }
517                CeilFloorKind::DateTimeField(_) => {
518                    not_impl_err!("CEIL with datetime is not supported")
519                }
520                CeilFloorKind::Scale(_) => {
521                    not_impl_err!("CEIL with scale is not supported")
522                }
523            },
524            SQLExpr::Overlay {
525                expr,
526                overlay_what,
527                overlay_from,
528                overlay_for,
529            } => self.sql_overlay_to_expr(
530                *expr,
531                *overlay_what,
532                *overlay_from,
533                overlay_for,
534                schema,
535                planner_context,
536            ),
537            SQLExpr::Nested(e) => {
538                self.sql_expr_to_logical_expr(*e, schema, planner_context)
539            }
540
541            SQLExpr::Exists { subquery, negated } => {
542                self.parse_exists_subquery(*subquery, negated, schema, planner_context)
543            }
544            SQLExpr::InSubquery {
545                expr,
546                subquery,
547                negated,
548            } => {
549                self.parse_in_subquery(*expr, *subquery, negated, schema, planner_context)
550            }
551            SQLExpr::Subquery(subquery) => {
552                self.parse_scalar_subquery(*subquery, schema, planner_context)
553            }
554
555            SQLExpr::Struct { values, fields } => {
556                self.parse_struct(schema, planner_context, values, fields)
557            }
558            SQLExpr::Position { expr, r#in } => {
559                self.sql_position_to_expr(*expr, *r#in, schema, planner_context)
560            }
561            SQLExpr::AtTimeZone {
562                timestamp,
563                time_zone,
564            } => Ok(Expr::Cast(Cast::new(
565                Box::new(self.sql_expr_to_logical_expr_internal(
566                    *timestamp,
567                    schema,
568                    planner_context,
569                )?),
570                match *time_zone {
571                    SQLExpr::Value(ValueWithSpan {
572                        value: Value::SingleQuotedString(s),
573                        span: _,
574                    }) => DataType::Timestamp(TimeUnit::Nanosecond, Some(s.into())),
575                    _ => {
576                        return not_impl_err!(
577                            "Unsupported ast node in sqltorel: {time_zone:?}"
578                        )
579                    }
580                },
581            ))),
582            SQLExpr::Dictionary(fields) => {
583                self.try_plan_dictionary_literal(fields, schema, planner_context)
584            }
585            SQLExpr::Map(map) => {
586                self.try_plan_map_literal(map.entries, schema, planner_context)
587            }
588            SQLExpr::AnyOp {
589                left,
590                compare_op,
591                right,
592                // ANY/SOME are equivalent, this field specifies which the user
593                // specified but it doesn't affect the plan so ignore the field
594                is_some: _,
595            } => {
596                let mut binary_expr = RawBinaryExpr {
597                    op: compare_op,
598                    left: self.sql_expr_to_logical_expr(
599                        *left,
600                        schema,
601                        planner_context,
602                    )?,
603                    right: self.sql_expr_to_logical_expr(
604                        *right,
605                        schema,
606                        planner_context,
607                    )?,
608                };
609                for planner in self.context_provider.get_expr_planners() {
610                    match planner.plan_any(binary_expr)? {
611                        PlannerResult::Planned(expr) => {
612                            return Ok(expr);
613                        }
614                        PlannerResult::Original(expr) => {
615                            binary_expr = expr;
616                        }
617                    }
618                }
619                not_impl_err!("AnyOp not supported by ExprPlanner: {binary_expr:?}")
620            }
621            #[expect(deprecated)]
622            SQLExpr::Wildcard(_token) => Ok(Expr::Wildcard {
623                qualifier: None,
624                options: Box::new(WildcardOptions::default()),
625            }),
626            #[expect(deprecated)]
627            SQLExpr::QualifiedWildcard(object_name, _token) => Ok(Expr::Wildcard {
628                qualifier: Some(self.object_name_to_table_reference(object_name)?),
629                options: Box::new(WildcardOptions::default()),
630            }),
631            SQLExpr::Tuple(values) => self.parse_tuple(schema, planner_context, values),
632            _ => not_impl_err!("Unsupported ast node in sqltorel: {sql:?}"),
633        }
634    }
635
636    /// Parses a struct(..) expression and plans it creation
637    fn parse_struct(
638        &self,
639        schema: &DFSchema,
640        planner_context: &mut PlannerContext,
641        values: Vec<SQLExpr>,
642        fields: Vec<StructField>,
643    ) -> Result<Expr> {
644        if !fields.is_empty() {
645            return not_impl_err!("Struct fields are not supported yet");
646        }
647        let is_named_struct = values
648            .iter()
649            .any(|value| matches!(value, SQLExpr::Named { .. }));
650
651        let mut create_struct_args = if is_named_struct {
652            self.create_named_struct_expr(values, schema, planner_context)?
653        } else {
654            self.create_struct_expr(values, schema, planner_context)?
655        };
656
657        for planner in self.context_provider.get_expr_planners() {
658            match planner.plan_struct_literal(create_struct_args, is_named_struct)? {
659                PlannerResult::Planned(expr) => return Ok(expr),
660                PlannerResult::Original(args) => create_struct_args = args,
661            }
662        }
663        not_impl_err!("Struct not supported by ExprPlanner: {create_struct_args:?}")
664    }
665
666    fn parse_tuple(
667        &self,
668        schema: &DFSchema,
669        planner_context: &mut PlannerContext,
670        values: Vec<SQLExpr>,
671    ) -> Result<Expr> {
672        match values.first() {
673            Some(SQLExpr::Identifier(_))
674            | Some(SQLExpr::Value(_))
675            | Some(SQLExpr::CompoundIdentifier(_)) => {
676                self.parse_struct(schema, planner_context, values, vec![])
677            }
678            None => not_impl_err!("Empty tuple not supported yet"),
679            _ => {
680                not_impl_err!("Only identifiers and literals are supported in tuples")
681            }
682        }
683    }
684
685    fn sql_position_to_expr(
686        &self,
687        substr_expr: SQLExpr,
688        str_expr: SQLExpr,
689        schema: &DFSchema,
690        planner_context: &mut PlannerContext,
691    ) -> Result<Expr> {
692        let substr =
693            self.sql_expr_to_logical_expr(substr_expr, schema, planner_context)?;
694        let fullstr = self.sql_expr_to_logical_expr(str_expr, schema, planner_context)?;
695        let mut position_args = vec![fullstr, substr];
696        for planner in self.context_provider.get_expr_planners() {
697            match planner.plan_position(position_args)? {
698                PlannerResult::Planned(expr) => return Ok(expr),
699                PlannerResult::Original(args) => {
700                    position_args = args;
701                }
702            }
703        }
704
705        not_impl_err!("Position not supported by ExprPlanner: {position_args:?}")
706    }
707
708    fn try_plan_dictionary_literal(
709        &self,
710        fields: Vec<DictionaryField>,
711        schema: &DFSchema,
712        planner_context: &mut PlannerContext,
713    ) -> Result<Expr> {
714        let mut keys = vec![];
715        let mut values = vec![];
716        for field in fields {
717            let key = lit(field.key.value);
718            let value =
719                self.sql_expr_to_logical_expr(*field.value, schema, planner_context)?;
720            keys.push(key);
721            values.push(value);
722        }
723
724        let mut raw_expr = RawDictionaryExpr { keys, values };
725
726        for planner in self.context_provider.get_expr_planners() {
727            match planner.plan_dictionary_literal(raw_expr, schema)? {
728                PlannerResult::Planned(expr) => {
729                    return Ok(expr);
730                }
731                PlannerResult::Original(expr) => raw_expr = expr,
732            }
733        }
734        not_impl_err!("Dictionary not supported by ExprPlanner: {raw_expr:?}")
735    }
736
737    fn try_plan_map_literal(
738        &self,
739        entries: Vec<MapEntry>,
740        schema: &DFSchema,
741        planner_context: &mut PlannerContext,
742    ) -> Result<Expr> {
743        let mut exprs: Vec<_> = entries
744            .into_iter()
745            .flat_map(|entry| vec![entry.key, entry.value].into_iter())
746            .map(|expr| self.sql_expr_to_logical_expr(*expr, schema, planner_context))
747            .collect::<Result<Vec<_>>>()?;
748        for planner in self.context_provider.get_expr_planners() {
749            match planner.plan_make_map(exprs)? {
750                PlannerResult::Planned(expr) => {
751                    return Ok(expr);
752                }
753                PlannerResult::Original(expr) => exprs = expr,
754            }
755        }
756        not_impl_err!("MAP not supported by ExprPlanner: {exprs:?}")
757    }
758
759    // Handles a call to struct(...) where the arguments are named. For example
760    // `struct (v as foo, v2 as bar)` by creating a call to the `named_struct` function
761    fn create_named_struct_expr(
762        &self,
763        values: Vec<SQLExpr>,
764        input_schema: &DFSchema,
765        planner_context: &mut PlannerContext,
766    ) -> Result<Vec<Expr>> {
767        Ok(values
768            .into_iter()
769            .enumerate()
770            .map(|(i, value)| {
771                let args = if let SQLExpr::Named { expr, name } = value {
772                    [
773                        name.value.lit(),
774                        self.sql_expr_to_logical_expr(
775                            *expr,
776                            input_schema,
777                            planner_context,
778                        )?,
779                    ]
780                } else {
781                    [
782                        format!("c{i}").lit(),
783                        self.sql_expr_to_logical_expr(
784                            value,
785                            input_schema,
786                            planner_context,
787                        )?,
788                    ]
789                };
790
791                Ok(args)
792            })
793            .collect::<Result<Vec<_>>>()?
794            .into_iter()
795            .flatten()
796            .collect())
797    }
798
799    // Handles a call to struct(...) where the arguments are not named. For example
800    // `struct (v, v2)` by creating a call to the `struct` function
801    // which will create a struct with fields named `c0`, `c1`, etc.
802    fn create_struct_expr(
803        &self,
804        values: Vec<SQLExpr>,
805        input_schema: &DFSchema,
806        planner_context: &mut PlannerContext,
807    ) -> Result<Vec<Expr>> {
808        values
809            .into_iter()
810            .map(|value| {
811                self.sql_expr_to_logical_expr(value, input_schema, planner_context)
812            })
813            .collect::<Result<Vec<_>>>()
814    }
815
816    fn sql_in_list_to_expr(
817        &self,
818        expr: SQLExpr,
819        list: Vec<SQLExpr>,
820        negated: bool,
821        schema: &DFSchema,
822        planner_context: &mut PlannerContext,
823    ) -> Result<Expr> {
824        let list_expr = list
825            .into_iter()
826            .map(|e| self.sql_expr_to_logical_expr(e, schema, planner_context))
827            .collect::<Result<Vec<_>>>()?;
828
829        Ok(Expr::InList(InList::new(
830            Box::new(self.sql_expr_to_logical_expr(expr, schema, planner_context)?),
831            list_expr,
832            negated,
833        )))
834    }
835
836    #[allow(clippy::too_many_arguments)]
837    fn sql_like_to_expr(
838        &self,
839        negated: bool,
840        expr: SQLExpr,
841        pattern: SQLExpr,
842        escape_char: Option<Value>,
843        schema: &DFSchema,
844        planner_context: &mut PlannerContext,
845        case_insensitive: bool,
846        any: bool,
847    ) -> Result<Expr> {
848        if any {
849            return not_impl_err!("ANY in LIKE expression");
850        }
851        let pattern = self.sql_expr_to_logical_expr(pattern, schema, planner_context)?;
852        let escape_char = match escape_char {
853            Some(Value::SingleQuotedString(char)) if char.len() == 1 => {
854                Some(char.chars().next().unwrap())
855            }
856            Some(value) => return plan_err!("Invalid escape character in LIKE expression. Expected a single character wrapped with single quotes, got {value}"),
857            None => None,
858        };
859        Ok(Expr::Like(Like::new(
860            negated,
861            Box::new(self.sql_expr_to_logical_expr(expr, schema, planner_context)?),
862            Box::new(pattern),
863            escape_char,
864            case_insensitive,
865        )))
866    }
867
868    fn sql_similarto_to_expr(
869        &self,
870        negated: bool,
871        expr: SQLExpr,
872        pattern: SQLExpr,
873        escape_char: Option<Value>,
874        schema: &DFSchema,
875        planner_context: &mut PlannerContext,
876    ) -> Result<Expr> {
877        let pattern = self.sql_expr_to_logical_expr(pattern, schema, planner_context)?;
878        let pattern_type = pattern.get_type(schema)?;
879        if pattern_type != DataType::Utf8 && pattern_type != DataType::Null {
880            return plan_err!("Invalid pattern in SIMILAR TO expression");
881        }
882        let escape_char = match escape_char {
883            Some(Value::SingleQuotedString(char)) if char.len() == 1 => {
884                Some(char.chars().next().unwrap())
885            }
886            Some(value) => return plan_err!("Invalid escape character in SIMILAR TO expression. Expected a single character wrapped with single quotes, got {value}"),
887            None => None,
888        };
889        Ok(Expr::SimilarTo(Like::new(
890            negated,
891            Box::new(self.sql_expr_to_logical_expr(expr, schema, planner_context)?),
892            Box::new(pattern),
893            escape_char,
894            false,
895        )))
896    }
897
898    fn sql_trim_to_expr(
899        &self,
900        expr: SQLExpr,
901        trim_where: Option<TrimWhereField>,
902        trim_what: Option<Box<SQLExpr>>,
903        trim_characters: Option<Vec<SQLExpr>>,
904        schema: &DFSchema,
905        planner_context: &mut PlannerContext,
906    ) -> Result<Expr> {
907        let arg = self.sql_expr_to_logical_expr(expr, schema, planner_context)?;
908        let args = match (trim_what, trim_characters) {
909            (Some(to_trim), None) => {
910                let to_trim =
911                    self.sql_expr_to_logical_expr(*to_trim, schema, planner_context)?;
912                Ok(vec![arg, to_trim])
913            }
914            (None, Some(trim_characters)) => {
915                if let Some(first) = trim_characters.first() {
916                    let to_trim = self.sql_expr_to_logical_expr(
917                        first.clone(),
918                        schema,
919                        planner_context,
920                    )?;
921                    Ok(vec![arg, to_trim])
922                } else {
923                    plan_err!("TRIM CHARACTERS cannot be empty")
924                }
925            }
926            (Some(_), Some(_)) => {
927                plan_err!("Both TRIM and TRIM CHARACTERS cannot be specified")
928            }
929            (None, None) => Ok(vec![arg]),
930        }?;
931
932        let fun_name = match trim_where {
933            Some(TrimWhereField::Leading) => "ltrim",
934            Some(TrimWhereField::Trailing) => "rtrim",
935            Some(TrimWhereField::Both) => "btrim",
936            None => "trim",
937        };
938        let fun = self
939            .context_provider
940            .get_function_meta(fun_name)
941            .ok_or_else(|| {
942                internal_datafusion_err!("Unable to find expected '{fun_name}' function")
943            })?;
944
945        Ok(Expr::ScalarFunction(ScalarFunction::new_udf(fun, args)))
946    }
947
948    fn sql_overlay_to_expr(
949        &self,
950        expr: SQLExpr,
951        overlay_what: SQLExpr,
952        overlay_from: SQLExpr,
953        overlay_for: Option<Box<SQLExpr>>,
954        schema: &DFSchema,
955        planner_context: &mut PlannerContext,
956    ) -> Result<Expr> {
957        let arg = self.sql_expr_to_logical_expr(expr, schema, planner_context)?;
958        let what_arg =
959            self.sql_expr_to_logical_expr(overlay_what, schema, planner_context)?;
960        let from_arg =
961            self.sql_expr_to_logical_expr(overlay_from, schema, planner_context)?;
962        let mut overlay_args = match overlay_for {
963            Some(for_expr) => {
964                let for_expr =
965                    self.sql_expr_to_logical_expr(*for_expr, schema, planner_context)?;
966                vec![arg, what_arg, from_arg, for_expr]
967            }
968            None => vec![arg, what_arg, from_arg],
969        };
970        for planner in self.context_provider.get_expr_planners() {
971            match planner.plan_overlay(overlay_args)? {
972                PlannerResult::Planned(expr) => return Ok(expr),
973                PlannerResult::Original(args) => overlay_args = args,
974            }
975        }
976        not_impl_err!("Overlay not supported by ExprPlanner: {overlay_args:?}")
977    }
978
979    fn sql_cast_to_expr(
980        &self,
981        expr: SQLExpr,
982        data_type: SQLDataType,
983        format: Option<CastFormat>,
984        schema: &DFSchema,
985        planner_context: &mut PlannerContext,
986    ) -> Result<Expr> {
987        if let Some(format) = format {
988            return not_impl_err!("CAST with format is not supported: {format}");
989        }
990
991        let dt = self.convert_data_type_to_field(&data_type)?;
992        let expr = self.sql_expr_to_logical_expr(expr, schema, planner_context)?;
993
994        // numeric constants are treated as seconds (rather as nanoseconds)
995        // to align with postgres / duckdb semantics
996        let expr = match dt.data_type() {
997            DataType::Timestamp(TimeUnit::Nanosecond, tz)
998                if expr.get_type(schema)? == DataType::Int64 =>
999            {
1000                Expr::Cast(Cast::new(
1001                    Box::new(expr),
1002                    DataType::Timestamp(TimeUnit::Second, tz.clone()),
1003                ))
1004            }
1005            _ => expr,
1006        };
1007
1008        // Currently drops metadata attached to the type
1009        // https://github.com/apache/datafusion/issues/18060
1010        Ok(Expr::Cast(Cast::new(
1011            Box::new(expr),
1012            dt.data_type().clone(),
1013        )))
1014    }
1015
1016    /// Extracts the root expression and access chain from a compound expression.
1017    ///
1018    /// This function attempts to identify if a compound expression (like `a.b.c`) should be treated
1019    /// as a column reference with a qualifier (like `table.column`) or as a field access expression.
1020    ///
1021    /// # Arguments
1022    ///
1023    /// * `root` - The root SQL expression (e.g., the first part of `a.b.c`)
1024    /// * `access_chain` - Vector of access expressions (e.g., `.b` and `.c` parts)
1025    /// * `schema` - The schema to resolve column references against
1026    /// * `planner_context` - Context for planning expressions
1027    ///
1028    /// # Returns
1029    ///
1030    /// A tuple containing:
1031    /// * The resolved root expression
1032    /// * The remaining access chain that should be processed as field accesses
1033    fn extract_root_and_access_chain(
1034        &self,
1035        root: SQLExpr,
1036        mut access_chain: Vec<AccessExpr>,
1037        schema: &DFSchema,
1038        planner_context: &mut PlannerContext,
1039    ) -> Result<(Expr, Vec<AccessExpr>)> {
1040        let SQLExpr::Identifier(root_ident) = root else {
1041            let root = self.sql_expr_to_logical_expr(root, schema, planner_context)?;
1042            return Ok((root, access_chain));
1043        };
1044
1045        let mut compound_idents = vec![root_ident];
1046        let first_non_ident = access_chain
1047            .iter()
1048            .position(|access| !matches!(access, AccessExpr::Dot(SQLExpr::Identifier(_))))
1049            .unwrap_or(access_chain.len());
1050        for access in access_chain.drain(0..first_non_ident) {
1051            if let AccessExpr::Dot(SQLExpr::Identifier(ident)) = access {
1052                compound_idents.push(ident);
1053            } else {
1054                return internal_err!("Expected identifier in access chain");
1055            }
1056        }
1057
1058        let root = if compound_idents.len() == 1 {
1059            self.sql_identifier_to_expr(
1060                compound_idents.pop().unwrap(),
1061                schema,
1062                planner_context,
1063            )?
1064        } else {
1065            self.sql_compound_identifier_to_expr(
1066                compound_idents,
1067                schema,
1068                planner_context,
1069            )?
1070        };
1071        Ok((root, access_chain))
1072    }
1073
1074    fn sql_compound_field_access_to_expr(
1075        &self,
1076        root: SQLExpr,
1077        access_chain: Vec<AccessExpr>,
1078        schema: &DFSchema,
1079        planner_context: &mut PlannerContext,
1080    ) -> Result<Expr> {
1081        let (root, access_chain) = self.extract_root_and_access_chain(
1082            root,
1083            access_chain,
1084            schema,
1085            planner_context,
1086        )?;
1087        let fields = access_chain
1088            .into_iter()
1089            .map(|field| match field {
1090                AccessExpr::Subscript(subscript) => {
1091                    match subscript {
1092                        Subscript::Index { index } => {
1093                            // index can be a name, in which case it is a named field access
1094                            match index {
1095                                SQLExpr::Value(ValueWithSpan {
1096                                    value:
1097                                        Value::SingleQuotedString(s)
1098                                        | Value::DoubleQuotedString(s),
1099                                    span: _,
1100                                }) => Ok(Some(GetFieldAccess::NamedStructField {
1101                                    name: ScalarValue::from(s),
1102                                })),
1103                                SQLExpr::JsonAccess { .. } => {
1104                                    not_impl_err!("JsonAccess")
1105                                }
1106                                // otherwise treat like a list index
1107                                _ => Ok(Some(GetFieldAccess::ListIndex {
1108                                    key: Box::new(self.sql_expr_to_logical_expr(
1109                                        index,
1110                                        schema,
1111                                        planner_context,
1112                                    )?),
1113                                })),
1114                            }
1115                        }
1116                        Subscript::Slice {
1117                            lower_bound,
1118                            upper_bound,
1119                            stride,
1120                        } => {
1121                            // Means access like [:2]
1122                            let lower_bound = if let Some(lower_bound) = lower_bound {
1123                                self.sql_expr_to_logical_expr(
1124                                    lower_bound,
1125                                    schema,
1126                                    planner_context,
1127                                )
1128                            } else {
1129                                not_impl_err!("Slice subscript requires a lower bound")
1130                            }?;
1131
1132                            // means access like [2:]
1133                            let upper_bound = if let Some(upper_bound) = upper_bound {
1134                                self.sql_expr_to_logical_expr(
1135                                    upper_bound,
1136                                    schema,
1137                                    planner_context,
1138                                )
1139                            } else {
1140                                not_impl_err!("Slice subscript requires an upper bound")
1141                            }?;
1142
1143                            // stride, default to 1
1144                            let stride = if let Some(stride) = stride {
1145                                self.sql_expr_to_logical_expr(
1146                                    stride,
1147                                    schema,
1148                                    planner_context,
1149                                )?
1150                            } else {
1151                                lit(1i64)
1152                            };
1153
1154                            Ok(Some(GetFieldAccess::ListRange {
1155                                start: Box::new(lower_bound),
1156                                stop: Box::new(upper_bound),
1157                                stride: Box::new(stride),
1158                            }))
1159                        }
1160                    }
1161                }
1162                AccessExpr::Dot(expr) => match expr {
1163                    SQLExpr::Value(ValueWithSpan {
1164                        value: Value::SingleQuotedString(s) | Value::DoubleQuotedString(s),
1165                        span    : _
1166                    }) => Ok(Some(GetFieldAccess::NamedStructField {
1167                        name: ScalarValue::from(s),
1168                    })),
1169                    _ => {
1170                        not_impl_err!(
1171                            "Dot access not supported for non-string expr: {expr:?}"
1172                        )
1173                    }
1174                },
1175            })
1176            .collect::<Result<Vec<_>>>()?;
1177
1178        fields
1179            .into_iter()
1180            .flatten()
1181            .try_fold(root, |expr, field_access| {
1182                let mut field_access_expr = RawFieldAccessExpr { expr, field_access };
1183                for planner in self.context_provider.get_expr_planners() {
1184                    match planner.plan_field_access(field_access_expr, schema)? {
1185                        PlannerResult::Planned(expr) => return Ok(expr),
1186                        PlannerResult::Original(expr) => {
1187                            field_access_expr = expr;
1188                        }
1189                    }
1190                }
1191                not_impl_err!(
1192                    "GetFieldAccess not supported by ExprPlanner: {field_access_expr:?}"
1193                )
1194            })
1195    }
1196}
1197
1198#[cfg(test)]
1199mod tests {
1200    use std::collections::HashMap;
1201    use std::sync::Arc;
1202
1203    use arrow::datatypes::{Field, Schema};
1204    use sqlparser::dialect::GenericDialect;
1205    use sqlparser::parser::Parser;
1206
1207    use datafusion_common::config::ConfigOptions;
1208    use datafusion_common::TableReference;
1209    use datafusion_expr::logical_plan::builder::LogicalTableSource;
1210    use datafusion_expr::{AggregateUDF, ScalarUDF, TableSource, WindowUDF};
1211
1212    use super::*;
1213
1214    struct TestContextProvider {
1215        options: ConfigOptions,
1216        tables: HashMap<String, Arc<dyn TableSource>>,
1217    }
1218
1219    impl TestContextProvider {
1220        pub fn new() -> Self {
1221            let mut tables = HashMap::new();
1222            tables.insert(
1223                "table1".to_string(),
1224                create_table_source(vec![Field::new(
1225                    "column1".to_string(),
1226                    DataType::Utf8,
1227                    false,
1228                )]),
1229            );
1230
1231            Self {
1232                options: Default::default(),
1233                tables,
1234            }
1235        }
1236    }
1237
1238    impl ContextProvider for TestContextProvider {
1239        fn get_table_source(&self, name: TableReference) -> Result<Arc<dyn TableSource>> {
1240            match self.tables.get(name.table()) {
1241                Some(table) => Ok(Arc::clone(table)),
1242                _ => plan_err!("Table not found: {}", name.table()),
1243            }
1244        }
1245
1246        fn get_function_meta(&self, _name: &str) -> Option<Arc<ScalarUDF>> {
1247            None
1248        }
1249
1250        fn get_aggregate_meta(&self, name: &str) -> Option<Arc<AggregateUDF>> {
1251            match name {
1252                "sum" => Some(datafusion_functions_aggregate::sum::sum_udaf()),
1253                _ => None,
1254            }
1255        }
1256
1257        fn get_variable_type(&self, _variable_names: &[String]) -> Option<DataType> {
1258            None
1259        }
1260
1261        fn options(&self) -> &ConfigOptions {
1262            &self.options
1263        }
1264
1265        fn get_window_meta(&self, _name: &str) -> Option<Arc<WindowUDF>> {
1266            None
1267        }
1268
1269        fn udf_names(&self) -> Vec<String> {
1270            Vec::new()
1271        }
1272
1273        fn udaf_names(&self) -> Vec<String> {
1274            vec!["sum".to_string()]
1275        }
1276
1277        fn udwf_names(&self) -> Vec<String> {
1278            Vec::new()
1279        }
1280    }
1281
1282    fn create_table_source(fields: Vec<Field>) -> Arc<dyn TableSource> {
1283        Arc::new(LogicalTableSource::new(Arc::new(
1284            Schema::new_with_metadata(fields, HashMap::new()),
1285        )))
1286    }
1287
1288    macro_rules! test_stack_overflow {
1289        ($num_expr:expr) => {
1290            paste::item! {
1291                #[test]
1292                fn [<test_stack_overflow_ $num_expr>]() {
1293                    let schema = DFSchema::empty();
1294                    let mut planner_context = PlannerContext::default();
1295
1296                    let expr_str = (0..$num_expr)
1297                        .map(|i| format!("column1 = 'value{:?}'", i))
1298                        .collect::<Vec<String>>()
1299                        .join(" OR ");
1300
1301                    let dialect = GenericDialect{};
1302                    let mut parser = Parser::new(&dialect)
1303                        .try_with_sql(expr_str.as_str())
1304                        .unwrap();
1305                    let sql_expr = parser.parse_expr().unwrap();
1306
1307                    let context_provider = TestContextProvider::new();
1308                    let sql_to_rel = SqlToRel::new(&context_provider);
1309
1310                    // Should not stack overflow
1311                    sql_to_rel.sql_expr_to_logical_expr(
1312                        sql_expr,
1313                        &schema,
1314                        &mut planner_context,
1315                    ).unwrap();
1316                }
1317            }
1318        };
1319    }
1320
1321    test_stack_overflow!(64);
1322    test_stack_overflow!(128);
1323    test_stack_overflow!(256);
1324    test_stack_overflow!(512);
1325    test_stack_overflow!(1024);
1326    test_stack_overflow!(2048);
1327    test_stack_overflow!(4096);
1328    test_stack_overflow!(8192);
1329    #[test]
1330    fn test_sql_to_expr_with_alias() {
1331        let schema = DFSchema::empty();
1332        let mut planner_context = PlannerContext::default();
1333
1334        let expr_str = "SUM(int_col) as sum_int_col";
1335
1336        let dialect = GenericDialect {};
1337        let mut parser = Parser::new(&dialect).try_with_sql(expr_str).unwrap();
1338        // from sqlparser
1339        let sql_expr = parser.parse_expr_with_alias().unwrap();
1340
1341        let context_provider = TestContextProvider::new();
1342        let sql_to_rel = SqlToRel::new(&context_provider);
1343
1344        let expr = sql_to_rel
1345            .sql_expr_to_logical_expr_with_alias(sql_expr, &schema, &mut planner_context)
1346            .unwrap();
1347
1348        assert!(matches!(expr, Expr::Alias(_)));
1349    }
1350}