1use arrow::datatypes::{DataType, TimeUnit};
19use datafusion_expr::planner::{
20 PlannerResult, RawBinaryExpr, RawDictionaryExpr, RawFieldAccessExpr,
21};
22use sqlparser::ast::{
23 AccessExpr, BinaryOperator, CastFormat, CastKind, CeilFloorKind,
24 DataType as SQLDataType, DateTimeField, DictionaryField, Expr as SQLExpr,
25 ExprWithAlias as SQLExprWithAlias, MapEntry, StructField, Subscript, TrimWhereField,
26 TypedString, Value, ValueWithSpan,
27};
28
29use datafusion_common::{
30 internal_datafusion_err, internal_err, not_impl_err, plan_err, DFSchema, Result,
31 ScalarValue,
32};
33
34use datafusion_expr::expr::ScalarFunction;
35use datafusion_expr::expr::{InList, WildcardOptions};
36use datafusion_expr::{
37 lit, Between, BinaryExpr, Cast, Expr, ExprSchemable, GetFieldAccess, Like, Literal,
38 Operator, TryCast,
39};
40
41use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
42
43mod binary_op;
44mod function;
45mod grouping_set;
46mod identifier;
47mod order_by;
48mod subquery;
49mod substring;
50mod unary_op;
51mod value;
52
53impl<S: ContextProvider> SqlToRel<'_, S> {
54 pub(crate) fn sql_expr_to_logical_expr_with_alias(
55 &self,
56 sql: SQLExprWithAlias,
57 schema: &DFSchema,
58 planner_context: &mut PlannerContext,
59 ) -> Result<Expr> {
60 let mut expr =
61 self.sql_expr_to_logical_expr(sql.expr, schema, planner_context)?;
62 if let Some(alias) = sql.alias {
63 expr = expr.alias(alias.value);
64 }
65 Ok(expr)
66 }
67 pub(crate) fn sql_expr_to_logical_expr(
68 &self,
69 sql: SQLExpr,
70 schema: &DFSchema,
71 planner_context: &mut PlannerContext,
72 ) -> Result<Expr> {
73 enum StackEntry {
74 SQLExpr(Box<SQLExpr>),
75 Operator(BinaryOperator),
76 }
77
78 let mut stack = vec![StackEntry::SQLExpr(Box::new(sql))];
83 let mut eval_stack = vec![];
84
85 while let Some(entry) = stack.pop() {
86 match entry {
87 StackEntry::SQLExpr(sql_expr) => {
88 match *sql_expr {
89 SQLExpr::BinaryOp { left, op, right } => {
90 stack.push(StackEntry::Operator(op));
93 stack.push(StackEntry::SQLExpr(right));
94 stack.push(StackEntry::SQLExpr(left));
95 }
96 _ => {
97 let expr = self.sql_expr_to_logical_expr_internal(
98 *sql_expr,
99 schema,
100 planner_context,
101 )?;
102 eval_stack.push(expr);
103 }
104 }
105 }
106 StackEntry::Operator(op) => {
107 let right = eval_stack.pop().unwrap();
108 let left = eval_stack.pop().unwrap();
109 let expr = self.build_logical_expr(op, left, right, schema)?;
110 eval_stack.push(expr);
111 }
112 }
113 }
114
115 assert_eq!(1, eval_stack.len());
116 let expr = eval_stack.pop().unwrap();
117 Ok(expr)
118 }
119
120 fn build_logical_expr(
121 &self,
122 op: BinaryOperator,
123 left: Expr,
124 right: Expr,
125 schema: &DFSchema,
126 ) -> Result<Expr> {
127 let mut binary_expr = RawBinaryExpr { op, left, right };
129 for planner in self.context_provider.get_expr_planners() {
130 match planner.plan_binary_op(binary_expr, schema)? {
131 PlannerResult::Planned(expr) => {
132 return Ok(expr);
133 }
134 PlannerResult::Original(expr) => {
135 binary_expr = expr;
136 }
137 }
138 }
139
140 let RawBinaryExpr { op, left, right } = binary_expr;
141 Ok(Expr::BinaryExpr(BinaryExpr::new(
142 Box::new(left),
143 self.parse_sql_binary_op(op)?,
144 Box::new(right),
145 )))
146 }
147
148 pub fn sql_to_expr_with_alias(
149 &self,
150 sql: SQLExprWithAlias,
151 schema: &DFSchema,
152 planner_context: &mut PlannerContext,
153 ) -> Result<Expr> {
154 let mut expr =
155 self.sql_expr_to_logical_expr_with_alias(sql, schema, planner_context)?;
156 expr = self.rewrite_partial_qualifier(expr, schema);
157 self.validate_schema_satisfies_exprs(schema, &[expr.clone()])?;
158 let (expr, _) = expr.infer_placeholder_types(schema)?;
159 Ok(expr)
160 }
161
162 pub fn sql_to_expr(
164 &self,
165 sql: SQLExpr,
166 schema: &DFSchema,
167 planner_context: &mut PlannerContext,
168 ) -> Result<Expr> {
169 let mut expr = self.sql_expr_to_logical_expr(sql, schema, planner_context)?;
171 expr = self.rewrite_partial_qualifier(expr, schema);
172 self.validate_schema_satisfies_exprs(schema, std::slice::from_ref(&expr))?;
173 let (expr, _) = expr.infer_placeholder_types(schema)?;
174 Ok(expr)
175 }
176
177 fn rewrite_partial_qualifier(&self, expr: Expr, schema: &DFSchema) -> Expr {
179 match expr {
180 Expr::Column(col) => match &col.relation {
181 Some(q) => {
182 match schema.iter().find(|(qualifier, field)| match qualifier {
183 Some(field_q) => {
184 field.name() == &col.name
185 && field_q.to_string().ends_with(&format!(".{q}"))
186 }
187 _ => false,
188 }) {
189 Some((qualifier, df_field)) => Expr::from((qualifier, df_field)),
190 None => Expr::Column(col),
191 }
192 }
193 None => Expr::Column(col),
194 },
195 _ => expr,
196 }
197 }
198
199 #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
202 fn sql_expr_to_logical_expr_internal(
203 &self,
204 sql: SQLExpr,
205 schema: &DFSchema,
206 planner_context: &mut PlannerContext,
207 ) -> Result<Expr> {
208 match sql {
214 SQLExpr::Value(value) => {
215 self.parse_value(value.into(), planner_context.prepare_param_data_types())
216 }
217 SQLExpr::Extract { field, expr, .. } => {
218 let mut extract_args = vec![
219 Expr::Literal(ScalarValue::from(format!("{field}")), None),
220 self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
221 ];
222
223 for planner in self.context_provider.get_expr_planners() {
224 match planner.plan_extract(extract_args)? {
225 PlannerResult::Planned(expr) => return Ok(expr),
226 PlannerResult::Original(args) => {
227 extract_args = args;
228 }
229 }
230 }
231
232 not_impl_err!("Extract not supported by ExprPlanner: {extract_args:?}")
233 }
234
235 SQLExpr::Array(arr) => self.sql_array_literal(arr.elem, schema),
236 SQLExpr::Interval(interval) => self.sql_interval_to_expr(false, interval),
237 SQLExpr::Identifier(id) => {
238 self.sql_identifier_to_expr(id, schema, planner_context)
239 }
240
241 SQLExpr::CompoundFieldAccess { root, access_chain } => self
243 .sql_compound_field_access_to_expr(
244 *root,
245 access_chain,
246 schema,
247 planner_context,
248 ),
249
250 SQLExpr::CompoundIdentifier(ids) => {
251 self.sql_compound_identifier_to_expr(ids, schema, planner_context)
252 }
253
254 SQLExpr::Case {
255 operand,
256 conditions,
257 else_result,
258 case_token: _,
259 end_token: _,
260 } => self.sql_case_identifier_to_expr(
261 operand,
262 conditions,
263 else_result,
264 schema,
265 planner_context,
266 ),
267
268 SQLExpr::Cast {
269 kind: CastKind::Cast | CastKind::DoubleColon,
270 expr,
271 data_type,
272 format,
273 } => self.sql_cast_to_expr(*expr, data_type, format, schema, planner_context),
274
275 SQLExpr::Cast {
276 kind: CastKind::TryCast | CastKind::SafeCast,
277 expr,
278 data_type,
279 format,
280 } => {
281 if let Some(format) = format {
282 return not_impl_err!("CAST with format is not supported: {format}");
283 }
284
285 Ok(Expr::TryCast(TryCast::new(
286 Box::new(self.sql_expr_to_logical_expr(
287 *expr,
288 schema,
289 planner_context,
290 )?),
291 self.convert_data_type_to_field(&data_type)?
292 .data_type()
293 .clone(),
294 )))
295 }
296
297 SQLExpr::TypedString(TypedString {
298 data_type,
299 value,
300 uses_odbc_syntax: _,
301 }) => Ok(Expr::Cast(Cast::new(
302 Box::new(lit(value.into_string().unwrap())),
303 self.convert_data_type_to_field(&data_type)?
304 .data_type()
305 .clone(),
306 ))),
307
308 SQLExpr::IsNull(expr) => Ok(Expr::IsNull(Box::new(
309 self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
310 ))),
311
312 SQLExpr::IsNotNull(expr) => Ok(Expr::IsNotNull(Box::new(
313 self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
314 ))),
315
316 SQLExpr::IsDistinctFrom(left, right) => {
317 Ok(Expr::BinaryExpr(BinaryExpr::new(
318 Box::new(self.sql_expr_to_logical_expr(
319 *left,
320 schema,
321 planner_context,
322 )?),
323 Operator::IsDistinctFrom,
324 Box::new(self.sql_expr_to_logical_expr(
325 *right,
326 schema,
327 planner_context,
328 )?),
329 )))
330 }
331
332 SQLExpr::IsNotDistinctFrom(left, right) => {
333 Ok(Expr::BinaryExpr(BinaryExpr::new(
334 Box::new(self.sql_expr_to_logical_expr(
335 *left,
336 schema,
337 planner_context,
338 )?),
339 Operator::IsNotDistinctFrom,
340 Box::new(self.sql_expr_to_logical_expr(
341 *right,
342 schema,
343 planner_context,
344 )?),
345 )))
346 }
347
348 SQLExpr::IsTrue(expr) => Ok(Expr::IsTrue(Box::new(
349 self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
350 ))),
351
352 SQLExpr::IsFalse(expr) => Ok(Expr::IsFalse(Box::new(
353 self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
354 ))),
355
356 SQLExpr::IsNotTrue(expr) => Ok(Expr::IsNotTrue(Box::new(
357 self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
358 ))),
359
360 SQLExpr::IsNotFalse(expr) => Ok(Expr::IsNotFalse(Box::new(
361 self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
362 ))),
363
364 SQLExpr::IsUnknown(expr) => Ok(Expr::IsUnknown(Box::new(
365 self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
366 ))),
367
368 SQLExpr::IsNotUnknown(expr) => Ok(Expr::IsNotUnknown(Box::new(
369 self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
370 ))),
371
372 SQLExpr::UnaryOp { op, expr } => {
373 self.parse_sql_unary_op(op, *expr, schema, planner_context)
374 }
375
376 SQLExpr::Between {
377 expr,
378 negated,
379 low,
380 high,
381 } => Ok(Expr::Between(Between::new(
382 Box::new(self.sql_expr_to_logical_expr(
383 *expr,
384 schema,
385 planner_context,
386 )?),
387 negated,
388 Box::new(self.sql_expr_to_logical_expr(*low, schema, planner_context)?),
389 Box::new(self.sql_expr_to_logical_expr(
390 *high,
391 schema,
392 planner_context,
393 )?),
394 ))),
395
396 SQLExpr::InList {
397 expr,
398 list,
399 negated,
400 } => self.sql_in_list_to_expr(*expr, list, negated, schema, planner_context),
401
402 SQLExpr::Like {
403 negated,
404 expr,
405 pattern,
406 escape_char,
407 any,
408 } => self.sql_like_to_expr(
409 negated,
410 *expr,
411 *pattern,
412 escape_char,
413 schema,
414 planner_context,
415 false,
416 any,
417 ),
418
419 SQLExpr::ILike {
420 negated,
421 expr,
422 pattern,
423 escape_char,
424 any,
425 } => self.sql_like_to_expr(
426 negated,
427 *expr,
428 *pattern,
429 escape_char,
430 schema,
431 planner_context,
432 true,
433 any,
434 ),
435
436 SQLExpr::SimilarTo {
437 negated,
438 expr,
439 pattern,
440 escape_char,
441 } => self.sql_similarto_to_expr(
442 negated,
443 *expr,
444 *pattern,
445 escape_char,
446 schema,
447 planner_context,
448 ),
449
450 SQLExpr::BinaryOp { .. } => {
451 internal_err!("binary_op should be handled by sql_expr_to_logical_expr.")
452 }
453
454 #[cfg(feature = "unicode_expressions")]
455 SQLExpr::Substring {
456 expr,
457 substring_from,
458 substring_for,
459 special: _,
460 shorthand: _,
461 } => self.sql_substring_to_expr(
462 expr,
463 substring_from,
464 substring_for,
465 schema,
466 planner_context,
467 ),
468
469 #[cfg(not(feature = "unicode_expressions"))]
470 SQLExpr::Substring { .. } => {
471 internal_err!(
472 "statement substring requires compilation with feature flag: unicode_expressions."
473 )
474 }
475
476 SQLExpr::Trim {
477 expr,
478 trim_where,
479 trim_what,
480 trim_characters,
481 } => self.sql_trim_to_expr(
482 *expr,
483 trim_where,
484 trim_what,
485 trim_characters,
486 schema,
487 planner_context,
488 ),
489
490 SQLExpr::Function(function) => {
491 self.sql_function_to_expr(function, schema, planner_context)
492 }
493
494 SQLExpr::Rollup(exprs) => {
495 self.sql_rollup_to_expr(exprs, schema, planner_context)
496 }
497 SQLExpr::Cube(exprs) => self.sql_cube_to_expr(exprs, schema, planner_context),
498 SQLExpr::GroupingSets(exprs) => {
499 self.sql_grouping_sets_to_expr(exprs, schema, planner_context)
500 }
501
502 SQLExpr::Floor { expr, field } => match field {
503 CeilFloorKind::DateTimeField(DateTimeField::NoDateTime) => {
504 self.sql_fn_name_to_expr(*expr, "floor", schema, planner_context)
505 }
506 CeilFloorKind::DateTimeField(_) => {
507 not_impl_err!("FLOOR with datetime is not supported")
508 }
509 CeilFloorKind::Scale(_) => {
510 not_impl_err!("FLOOR with scale is not supported")
511 }
512 },
513 SQLExpr::Ceil { expr, field } => match field {
514 CeilFloorKind::DateTimeField(DateTimeField::NoDateTime) => {
515 self.sql_fn_name_to_expr(*expr, "ceil", schema, planner_context)
516 }
517 CeilFloorKind::DateTimeField(_) => {
518 not_impl_err!("CEIL with datetime is not supported")
519 }
520 CeilFloorKind::Scale(_) => {
521 not_impl_err!("CEIL with scale is not supported")
522 }
523 },
524 SQLExpr::Overlay {
525 expr,
526 overlay_what,
527 overlay_from,
528 overlay_for,
529 } => self.sql_overlay_to_expr(
530 *expr,
531 *overlay_what,
532 *overlay_from,
533 overlay_for,
534 schema,
535 planner_context,
536 ),
537 SQLExpr::Nested(e) => {
538 self.sql_expr_to_logical_expr(*e, schema, planner_context)
539 }
540
541 SQLExpr::Exists { subquery, negated } => {
542 self.parse_exists_subquery(*subquery, negated, schema, planner_context)
543 }
544 SQLExpr::InSubquery {
545 expr,
546 subquery,
547 negated,
548 } => {
549 self.parse_in_subquery(*expr, *subquery, negated, schema, planner_context)
550 }
551 SQLExpr::Subquery(subquery) => {
552 self.parse_scalar_subquery(*subquery, schema, planner_context)
553 }
554
555 SQLExpr::Struct { values, fields } => {
556 self.parse_struct(schema, planner_context, values, fields)
557 }
558 SQLExpr::Position { expr, r#in } => {
559 self.sql_position_to_expr(*expr, *r#in, schema, planner_context)
560 }
561 SQLExpr::AtTimeZone {
562 timestamp,
563 time_zone,
564 } => Ok(Expr::Cast(Cast::new(
565 Box::new(self.sql_expr_to_logical_expr_internal(
566 *timestamp,
567 schema,
568 planner_context,
569 )?),
570 match *time_zone {
571 SQLExpr::Value(ValueWithSpan {
572 value: Value::SingleQuotedString(s),
573 span: _,
574 }) => DataType::Timestamp(TimeUnit::Nanosecond, Some(s.into())),
575 _ => {
576 return not_impl_err!(
577 "Unsupported ast node in sqltorel: {time_zone:?}"
578 )
579 }
580 },
581 ))),
582 SQLExpr::Dictionary(fields) => {
583 self.try_plan_dictionary_literal(fields, schema, planner_context)
584 }
585 SQLExpr::Map(map) => {
586 self.try_plan_map_literal(map.entries, schema, planner_context)
587 }
588 SQLExpr::AnyOp {
589 left,
590 compare_op,
591 right,
592 is_some: _,
595 } => {
596 let mut binary_expr = RawBinaryExpr {
597 op: compare_op,
598 left: self.sql_expr_to_logical_expr(
599 *left,
600 schema,
601 planner_context,
602 )?,
603 right: self.sql_expr_to_logical_expr(
604 *right,
605 schema,
606 planner_context,
607 )?,
608 };
609 for planner in self.context_provider.get_expr_planners() {
610 match planner.plan_any(binary_expr)? {
611 PlannerResult::Planned(expr) => {
612 return Ok(expr);
613 }
614 PlannerResult::Original(expr) => {
615 binary_expr = expr;
616 }
617 }
618 }
619 not_impl_err!("AnyOp not supported by ExprPlanner: {binary_expr:?}")
620 }
621 #[expect(deprecated)]
622 SQLExpr::Wildcard(_token) => Ok(Expr::Wildcard {
623 qualifier: None,
624 options: Box::new(WildcardOptions::default()),
625 }),
626 #[expect(deprecated)]
627 SQLExpr::QualifiedWildcard(object_name, _token) => Ok(Expr::Wildcard {
628 qualifier: Some(self.object_name_to_table_reference(object_name)?),
629 options: Box::new(WildcardOptions::default()),
630 }),
631 SQLExpr::Tuple(values) => self.parse_tuple(schema, planner_context, values),
632 _ => not_impl_err!("Unsupported ast node in sqltorel: {sql:?}"),
633 }
634 }
635
636 fn parse_struct(
638 &self,
639 schema: &DFSchema,
640 planner_context: &mut PlannerContext,
641 values: Vec<SQLExpr>,
642 fields: Vec<StructField>,
643 ) -> Result<Expr> {
644 if !fields.is_empty() {
645 return not_impl_err!("Struct fields are not supported yet");
646 }
647 let is_named_struct = values
648 .iter()
649 .any(|value| matches!(value, SQLExpr::Named { .. }));
650
651 let mut create_struct_args = if is_named_struct {
652 self.create_named_struct_expr(values, schema, planner_context)?
653 } else {
654 self.create_struct_expr(values, schema, planner_context)?
655 };
656
657 for planner in self.context_provider.get_expr_planners() {
658 match planner.plan_struct_literal(create_struct_args, is_named_struct)? {
659 PlannerResult::Planned(expr) => return Ok(expr),
660 PlannerResult::Original(args) => create_struct_args = args,
661 }
662 }
663 not_impl_err!("Struct not supported by ExprPlanner: {create_struct_args:?}")
664 }
665
666 fn parse_tuple(
667 &self,
668 schema: &DFSchema,
669 planner_context: &mut PlannerContext,
670 values: Vec<SQLExpr>,
671 ) -> Result<Expr> {
672 match values.first() {
673 Some(SQLExpr::Identifier(_))
674 | Some(SQLExpr::Value(_))
675 | Some(SQLExpr::CompoundIdentifier(_)) => {
676 self.parse_struct(schema, planner_context, values, vec![])
677 }
678 None => not_impl_err!("Empty tuple not supported yet"),
679 _ => {
680 not_impl_err!("Only identifiers and literals are supported in tuples")
681 }
682 }
683 }
684
685 fn sql_position_to_expr(
686 &self,
687 substr_expr: SQLExpr,
688 str_expr: SQLExpr,
689 schema: &DFSchema,
690 planner_context: &mut PlannerContext,
691 ) -> Result<Expr> {
692 let substr =
693 self.sql_expr_to_logical_expr(substr_expr, schema, planner_context)?;
694 let fullstr = self.sql_expr_to_logical_expr(str_expr, schema, planner_context)?;
695 let mut position_args = vec![fullstr, substr];
696 for planner in self.context_provider.get_expr_planners() {
697 match planner.plan_position(position_args)? {
698 PlannerResult::Planned(expr) => return Ok(expr),
699 PlannerResult::Original(args) => {
700 position_args = args;
701 }
702 }
703 }
704
705 not_impl_err!("Position not supported by ExprPlanner: {position_args:?}")
706 }
707
708 fn try_plan_dictionary_literal(
709 &self,
710 fields: Vec<DictionaryField>,
711 schema: &DFSchema,
712 planner_context: &mut PlannerContext,
713 ) -> Result<Expr> {
714 let mut keys = vec![];
715 let mut values = vec![];
716 for field in fields {
717 let key = lit(field.key.value);
718 let value =
719 self.sql_expr_to_logical_expr(*field.value, schema, planner_context)?;
720 keys.push(key);
721 values.push(value);
722 }
723
724 let mut raw_expr = RawDictionaryExpr { keys, values };
725
726 for planner in self.context_provider.get_expr_planners() {
727 match planner.plan_dictionary_literal(raw_expr, schema)? {
728 PlannerResult::Planned(expr) => {
729 return Ok(expr);
730 }
731 PlannerResult::Original(expr) => raw_expr = expr,
732 }
733 }
734 not_impl_err!("Dictionary not supported by ExprPlanner: {raw_expr:?}")
735 }
736
737 fn try_plan_map_literal(
738 &self,
739 entries: Vec<MapEntry>,
740 schema: &DFSchema,
741 planner_context: &mut PlannerContext,
742 ) -> Result<Expr> {
743 let mut exprs: Vec<_> = entries
744 .into_iter()
745 .flat_map(|entry| vec![entry.key, entry.value].into_iter())
746 .map(|expr| self.sql_expr_to_logical_expr(*expr, schema, planner_context))
747 .collect::<Result<Vec<_>>>()?;
748 for planner in self.context_provider.get_expr_planners() {
749 match planner.plan_make_map(exprs)? {
750 PlannerResult::Planned(expr) => {
751 return Ok(expr);
752 }
753 PlannerResult::Original(expr) => exprs = expr,
754 }
755 }
756 not_impl_err!("MAP not supported by ExprPlanner: {exprs:?}")
757 }
758
759 fn create_named_struct_expr(
762 &self,
763 values: Vec<SQLExpr>,
764 input_schema: &DFSchema,
765 planner_context: &mut PlannerContext,
766 ) -> Result<Vec<Expr>> {
767 Ok(values
768 .into_iter()
769 .enumerate()
770 .map(|(i, value)| {
771 let args = if let SQLExpr::Named { expr, name } = value {
772 [
773 name.value.lit(),
774 self.sql_expr_to_logical_expr(
775 *expr,
776 input_schema,
777 planner_context,
778 )?,
779 ]
780 } else {
781 [
782 format!("c{i}").lit(),
783 self.sql_expr_to_logical_expr(
784 value,
785 input_schema,
786 planner_context,
787 )?,
788 ]
789 };
790
791 Ok(args)
792 })
793 .collect::<Result<Vec<_>>>()?
794 .into_iter()
795 .flatten()
796 .collect())
797 }
798
799 fn create_struct_expr(
803 &self,
804 values: Vec<SQLExpr>,
805 input_schema: &DFSchema,
806 planner_context: &mut PlannerContext,
807 ) -> Result<Vec<Expr>> {
808 values
809 .into_iter()
810 .map(|value| {
811 self.sql_expr_to_logical_expr(value, input_schema, planner_context)
812 })
813 .collect::<Result<Vec<_>>>()
814 }
815
816 fn sql_in_list_to_expr(
817 &self,
818 expr: SQLExpr,
819 list: Vec<SQLExpr>,
820 negated: bool,
821 schema: &DFSchema,
822 planner_context: &mut PlannerContext,
823 ) -> Result<Expr> {
824 let list_expr = list
825 .into_iter()
826 .map(|e| self.sql_expr_to_logical_expr(e, schema, planner_context))
827 .collect::<Result<Vec<_>>>()?;
828
829 Ok(Expr::InList(InList::new(
830 Box::new(self.sql_expr_to_logical_expr(expr, schema, planner_context)?),
831 list_expr,
832 negated,
833 )))
834 }
835
836 #[allow(clippy::too_many_arguments)]
837 fn sql_like_to_expr(
838 &self,
839 negated: bool,
840 expr: SQLExpr,
841 pattern: SQLExpr,
842 escape_char: Option<Value>,
843 schema: &DFSchema,
844 planner_context: &mut PlannerContext,
845 case_insensitive: bool,
846 any: bool,
847 ) -> Result<Expr> {
848 if any {
849 return not_impl_err!("ANY in LIKE expression");
850 }
851 let pattern = self.sql_expr_to_logical_expr(pattern, schema, planner_context)?;
852 let escape_char = match escape_char {
853 Some(Value::SingleQuotedString(char)) if char.len() == 1 => {
854 Some(char.chars().next().unwrap())
855 }
856 Some(value) => return plan_err!("Invalid escape character in LIKE expression. Expected a single character wrapped with single quotes, got {value}"),
857 None => None,
858 };
859 Ok(Expr::Like(Like::new(
860 negated,
861 Box::new(self.sql_expr_to_logical_expr(expr, schema, planner_context)?),
862 Box::new(pattern),
863 escape_char,
864 case_insensitive,
865 )))
866 }
867
868 fn sql_similarto_to_expr(
869 &self,
870 negated: bool,
871 expr: SQLExpr,
872 pattern: SQLExpr,
873 escape_char: Option<Value>,
874 schema: &DFSchema,
875 planner_context: &mut PlannerContext,
876 ) -> Result<Expr> {
877 let pattern = self.sql_expr_to_logical_expr(pattern, schema, planner_context)?;
878 let pattern_type = pattern.get_type(schema)?;
879 if pattern_type != DataType::Utf8 && pattern_type != DataType::Null {
880 return plan_err!("Invalid pattern in SIMILAR TO expression");
881 }
882 let escape_char = match escape_char {
883 Some(Value::SingleQuotedString(char)) if char.len() == 1 => {
884 Some(char.chars().next().unwrap())
885 }
886 Some(value) => return plan_err!("Invalid escape character in SIMILAR TO expression. Expected a single character wrapped with single quotes, got {value}"),
887 None => None,
888 };
889 Ok(Expr::SimilarTo(Like::new(
890 negated,
891 Box::new(self.sql_expr_to_logical_expr(expr, schema, planner_context)?),
892 Box::new(pattern),
893 escape_char,
894 false,
895 )))
896 }
897
898 fn sql_trim_to_expr(
899 &self,
900 expr: SQLExpr,
901 trim_where: Option<TrimWhereField>,
902 trim_what: Option<Box<SQLExpr>>,
903 trim_characters: Option<Vec<SQLExpr>>,
904 schema: &DFSchema,
905 planner_context: &mut PlannerContext,
906 ) -> Result<Expr> {
907 let arg = self.sql_expr_to_logical_expr(expr, schema, planner_context)?;
908 let args = match (trim_what, trim_characters) {
909 (Some(to_trim), None) => {
910 let to_trim =
911 self.sql_expr_to_logical_expr(*to_trim, schema, planner_context)?;
912 Ok(vec![arg, to_trim])
913 }
914 (None, Some(trim_characters)) => {
915 if let Some(first) = trim_characters.first() {
916 let to_trim = self.sql_expr_to_logical_expr(
917 first.clone(),
918 schema,
919 planner_context,
920 )?;
921 Ok(vec![arg, to_trim])
922 } else {
923 plan_err!("TRIM CHARACTERS cannot be empty")
924 }
925 }
926 (Some(_), Some(_)) => {
927 plan_err!("Both TRIM and TRIM CHARACTERS cannot be specified")
928 }
929 (None, None) => Ok(vec![arg]),
930 }?;
931
932 let fun_name = match trim_where {
933 Some(TrimWhereField::Leading) => "ltrim",
934 Some(TrimWhereField::Trailing) => "rtrim",
935 Some(TrimWhereField::Both) => "btrim",
936 None => "trim",
937 };
938 let fun = self
939 .context_provider
940 .get_function_meta(fun_name)
941 .ok_or_else(|| {
942 internal_datafusion_err!("Unable to find expected '{fun_name}' function")
943 })?;
944
945 Ok(Expr::ScalarFunction(ScalarFunction::new_udf(fun, args)))
946 }
947
948 fn sql_overlay_to_expr(
949 &self,
950 expr: SQLExpr,
951 overlay_what: SQLExpr,
952 overlay_from: SQLExpr,
953 overlay_for: Option<Box<SQLExpr>>,
954 schema: &DFSchema,
955 planner_context: &mut PlannerContext,
956 ) -> Result<Expr> {
957 let arg = self.sql_expr_to_logical_expr(expr, schema, planner_context)?;
958 let what_arg =
959 self.sql_expr_to_logical_expr(overlay_what, schema, planner_context)?;
960 let from_arg =
961 self.sql_expr_to_logical_expr(overlay_from, schema, planner_context)?;
962 let mut overlay_args = match overlay_for {
963 Some(for_expr) => {
964 let for_expr =
965 self.sql_expr_to_logical_expr(*for_expr, schema, planner_context)?;
966 vec![arg, what_arg, from_arg, for_expr]
967 }
968 None => vec![arg, what_arg, from_arg],
969 };
970 for planner in self.context_provider.get_expr_planners() {
971 match planner.plan_overlay(overlay_args)? {
972 PlannerResult::Planned(expr) => return Ok(expr),
973 PlannerResult::Original(args) => overlay_args = args,
974 }
975 }
976 not_impl_err!("Overlay not supported by ExprPlanner: {overlay_args:?}")
977 }
978
979 fn sql_cast_to_expr(
980 &self,
981 expr: SQLExpr,
982 data_type: SQLDataType,
983 format: Option<CastFormat>,
984 schema: &DFSchema,
985 planner_context: &mut PlannerContext,
986 ) -> Result<Expr> {
987 if let Some(format) = format {
988 return not_impl_err!("CAST with format is not supported: {format}");
989 }
990
991 let dt = self.convert_data_type_to_field(&data_type)?;
992 let expr = self.sql_expr_to_logical_expr(expr, schema, planner_context)?;
993
994 let expr = match dt.data_type() {
997 DataType::Timestamp(TimeUnit::Nanosecond, tz)
998 if expr.get_type(schema)? == DataType::Int64 =>
999 {
1000 Expr::Cast(Cast::new(
1001 Box::new(expr),
1002 DataType::Timestamp(TimeUnit::Second, tz.clone()),
1003 ))
1004 }
1005 _ => expr,
1006 };
1007
1008 Ok(Expr::Cast(Cast::new(
1011 Box::new(expr),
1012 dt.data_type().clone(),
1013 )))
1014 }
1015
1016 fn extract_root_and_access_chain(
1034 &self,
1035 root: SQLExpr,
1036 mut access_chain: Vec<AccessExpr>,
1037 schema: &DFSchema,
1038 planner_context: &mut PlannerContext,
1039 ) -> Result<(Expr, Vec<AccessExpr>)> {
1040 let SQLExpr::Identifier(root_ident) = root else {
1041 let root = self.sql_expr_to_logical_expr(root, schema, planner_context)?;
1042 return Ok((root, access_chain));
1043 };
1044
1045 let mut compound_idents = vec![root_ident];
1046 let first_non_ident = access_chain
1047 .iter()
1048 .position(|access| !matches!(access, AccessExpr::Dot(SQLExpr::Identifier(_))))
1049 .unwrap_or(access_chain.len());
1050 for access in access_chain.drain(0..first_non_ident) {
1051 if let AccessExpr::Dot(SQLExpr::Identifier(ident)) = access {
1052 compound_idents.push(ident);
1053 } else {
1054 return internal_err!("Expected identifier in access chain");
1055 }
1056 }
1057
1058 let root = if compound_idents.len() == 1 {
1059 self.sql_identifier_to_expr(
1060 compound_idents.pop().unwrap(),
1061 schema,
1062 planner_context,
1063 )?
1064 } else {
1065 self.sql_compound_identifier_to_expr(
1066 compound_idents,
1067 schema,
1068 planner_context,
1069 )?
1070 };
1071 Ok((root, access_chain))
1072 }
1073
1074 fn sql_compound_field_access_to_expr(
1075 &self,
1076 root: SQLExpr,
1077 access_chain: Vec<AccessExpr>,
1078 schema: &DFSchema,
1079 planner_context: &mut PlannerContext,
1080 ) -> Result<Expr> {
1081 let (root, access_chain) = self.extract_root_and_access_chain(
1082 root,
1083 access_chain,
1084 schema,
1085 planner_context,
1086 )?;
1087 let fields = access_chain
1088 .into_iter()
1089 .map(|field| match field {
1090 AccessExpr::Subscript(subscript) => {
1091 match subscript {
1092 Subscript::Index { index } => {
1093 match index {
1095 SQLExpr::Value(ValueWithSpan {
1096 value:
1097 Value::SingleQuotedString(s)
1098 | Value::DoubleQuotedString(s),
1099 span: _,
1100 }) => Ok(Some(GetFieldAccess::NamedStructField {
1101 name: ScalarValue::from(s),
1102 })),
1103 SQLExpr::JsonAccess { .. } => {
1104 not_impl_err!("JsonAccess")
1105 }
1106 _ => Ok(Some(GetFieldAccess::ListIndex {
1108 key: Box::new(self.sql_expr_to_logical_expr(
1109 index,
1110 schema,
1111 planner_context,
1112 )?),
1113 })),
1114 }
1115 }
1116 Subscript::Slice {
1117 lower_bound,
1118 upper_bound,
1119 stride,
1120 } => {
1121 let lower_bound = if let Some(lower_bound) = lower_bound {
1123 self.sql_expr_to_logical_expr(
1124 lower_bound,
1125 schema,
1126 planner_context,
1127 )
1128 } else {
1129 not_impl_err!("Slice subscript requires a lower bound")
1130 }?;
1131
1132 let upper_bound = if let Some(upper_bound) = upper_bound {
1134 self.sql_expr_to_logical_expr(
1135 upper_bound,
1136 schema,
1137 planner_context,
1138 )
1139 } else {
1140 not_impl_err!("Slice subscript requires an upper bound")
1141 }?;
1142
1143 let stride = if let Some(stride) = stride {
1145 self.sql_expr_to_logical_expr(
1146 stride,
1147 schema,
1148 planner_context,
1149 )?
1150 } else {
1151 lit(1i64)
1152 };
1153
1154 Ok(Some(GetFieldAccess::ListRange {
1155 start: Box::new(lower_bound),
1156 stop: Box::new(upper_bound),
1157 stride: Box::new(stride),
1158 }))
1159 }
1160 }
1161 }
1162 AccessExpr::Dot(expr) => match expr {
1163 SQLExpr::Value(ValueWithSpan {
1164 value: Value::SingleQuotedString(s) | Value::DoubleQuotedString(s),
1165 span : _
1166 }) => Ok(Some(GetFieldAccess::NamedStructField {
1167 name: ScalarValue::from(s),
1168 })),
1169 _ => {
1170 not_impl_err!(
1171 "Dot access not supported for non-string expr: {expr:?}"
1172 )
1173 }
1174 },
1175 })
1176 .collect::<Result<Vec<_>>>()?;
1177
1178 fields
1179 .into_iter()
1180 .flatten()
1181 .try_fold(root, |expr, field_access| {
1182 let mut field_access_expr = RawFieldAccessExpr { expr, field_access };
1183 for planner in self.context_provider.get_expr_planners() {
1184 match planner.plan_field_access(field_access_expr, schema)? {
1185 PlannerResult::Planned(expr) => return Ok(expr),
1186 PlannerResult::Original(expr) => {
1187 field_access_expr = expr;
1188 }
1189 }
1190 }
1191 not_impl_err!(
1192 "GetFieldAccess not supported by ExprPlanner: {field_access_expr:?}"
1193 )
1194 })
1195 }
1196}
1197
1198#[cfg(test)]
1199mod tests {
1200 use std::collections::HashMap;
1201 use std::sync::Arc;
1202
1203 use arrow::datatypes::{Field, Schema};
1204 use sqlparser::dialect::GenericDialect;
1205 use sqlparser::parser::Parser;
1206
1207 use datafusion_common::config::ConfigOptions;
1208 use datafusion_common::TableReference;
1209 use datafusion_expr::logical_plan::builder::LogicalTableSource;
1210 use datafusion_expr::{AggregateUDF, ScalarUDF, TableSource, WindowUDF};
1211
1212 use super::*;
1213
1214 struct TestContextProvider {
1215 options: ConfigOptions,
1216 tables: HashMap<String, Arc<dyn TableSource>>,
1217 }
1218
1219 impl TestContextProvider {
1220 pub fn new() -> Self {
1221 let mut tables = HashMap::new();
1222 tables.insert(
1223 "table1".to_string(),
1224 create_table_source(vec![Field::new(
1225 "column1".to_string(),
1226 DataType::Utf8,
1227 false,
1228 )]),
1229 );
1230
1231 Self {
1232 options: Default::default(),
1233 tables,
1234 }
1235 }
1236 }
1237
1238 impl ContextProvider for TestContextProvider {
1239 fn get_table_source(&self, name: TableReference) -> Result<Arc<dyn TableSource>> {
1240 match self.tables.get(name.table()) {
1241 Some(table) => Ok(Arc::clone(table)),
1242 _ => plan_err!("Table not found: {}", name.table()),
1243 }
1244 }
1245
1246 fn get_function_meta(&self, _name: &str) -> Option<Arc<ScalarUDF>> {
1247 None
1248 }
1249
1250 fn get_aggregate_meta(&self, name: &str) -> Option<Arc<AggregateUDF>> {
1251 match name {
1252 "sum" => Some(datafusion_functions_aggregate::sum::sum_udaf()),
1253 _ => None,
1254 }
1255 }
1256
1257 fn get_variable_type(&self, _variable_names: &[String]) -> Option<DataType> {
1258 None
1259 }
1260
1261 fn options(&self) -> &ConfigOptions {
1262 &self.options
1263 }
1264
1265 fn get_window_meta(&self, _name: &str) -> Option<Arc<WindowUDF>> {
1266 None
1267 }
1268
1269 fn udf_names(&self) -> Vec<String> {
1270 Vec::new()
1271 }
1272
1273 fn udaf_names(&self) -> Vec<String> {
1274 vec!["sum".to_string()]
1275 }
1276
1277 fn udwf_names(&self) -> Vec<String> {
1278 Vec::new()
1279 }
1280 }
1281
1282 fn create_table_source(fields: Vec<Field>) -> Arc<dyn TableSource> {
1283 Arc::new(LogicalTableSource::new(Arc::new(
1284 Schema::new_with_metadata(fields, HashMap::new()),
1285 )))
1286 }
1287
1288 macro_rules! test_stack_overflow {
1289 ($num_expr:expr) => {
1290 paste::item! {
1291 #[test]
1292 fn [<test_stack_overflow_ $num_expr>]() {
1293 let schema = DFSchema::empty();
1294 let mut planner_context = PlannerContext::default();
1295
1296 let expr_str = (0..$num_expr)
1297 .map(|i| format!("column1 = 'value{:?}'", i))
1298 .collect::<Vec<String>>()
1299 .join(" OR ");
1300
1301 let dialect = GenericDialect{};
1302 let mut parser = Parser::new(&dialect)
1303 .try_with_sql(expr_str.as_str())
1304 .unwrap();
1305 let sql_expr = parser.parse_expr().unwrap();
1306
1307 let context_provider = TestContextProvider::new();
1308 let sql_to_rel = SqlToRel::new(&context_provider);
1309
1310 sql_to_rel.sql_expr_to_logical_expr(
1312 sql_expr,
1313 &schema,
1314 &mut planner_context,
1315 ).unwrap();
1316 }
1317 }
1318 };
1319 }
1320
1321 test_stack_overflow!(64);
1322 test_stack_overflow!(128);
1323 test_stack_overflow!(256);
1324 test_stack_overflow!(512);
1325 test_stack_overflow!(1024);
1326 test_stack_overflow!(2048);
1327 test_stack_overflow!(4096);
1328 test_stack_overflow!(8192);
1329 #[test]
1330 fn test_sql_to_expr_with_alias() {
1331 let schema = DFSchema::empty();
1332 let mut planner_context = PlannerContext::default();
1333
1334 let expr_str = "SUM(int_col) as sum_int_col";
1335
1336 let dialect = GenericDialect {};
1337 let mut parser = Parser::new(&dialect).try_with_sql(expr_str).unwrap();
1338 let sql_expr = parser.parse_expr_with_alias().unwrap();
1340
1341 let context_provider = TestContextProvider::new();
1342 let sql_to_rel = SqlToRel::new(&context_provider);
1343
1344 let expr = sql_to_rel
1345 .sql_expr_to_logical_expr_with_alias(sql_expr, &schema, &mut planner_context)
1346 .unwrap();
1347
1348 assert!(matches!(expr, Expr::Alias(_)));
1349 }
1350}