1use std::collections::HashSet;
21use std::ops::Not;
22use std::{borrow::Cow, sync::Arc};
23
24use arrow::{
25 array::{new_null_array, AsArray},
26 datatypes::{DataType, Field, Schema},
27 record_batch::RecordBatch,
28};
29
30use datafusion_common::tree_node::TreeNodeContainer;
31use datafusion_common::{
32 cast::{as_large_list_array, as_list_array},
33 metadata::FieldMetadata,
34 tree_node::{
35 Transformed, TransformedResult, TreeNode, TreeNodeRecursion, TreeNodeRewriter,
36 },
37};
38use datafusion_common::{
39 exec_datafusion_err, internal_err, DFSchema, DataFusionError, Result, ScalarValue,
40};
41use datafusion_expr::{
42 and, binary::BinaryTypeCoercer, lit, or, simplify::SimplifyContext, BinaryExpr, Case,
43 ColumnarValue, Expr, Like, Operator, Volatility,
44};
45use datafusion_expr::{expr::ScalarFunction, interval_arithmetic::NullableInterval};
46use datafusion_expr::{
47 expr::{InList, InSubquery},
48 utils::{iter_conjunction, iter_conjunction_owned},
49};
50use datafusion_expr::{simplify::ExprSimplifyResult, Cast, TryCast};
51use datafusion_physical_expr::{create_physical_expr, execution_props::ExecutionProps};
52
53use super::inlist_simplifier::ShortenInListSimplifier;
54use super::utils::*;
55use crate::analyzer::type_coercion::TypeCoercionRewriter;
56use crate::simplify_expressions::guarantees::GuaranteeRewriter;
57use crate::simplify_expressions::regex::simplify_regex_expr;
58use crate::simplify_expressions::unwrap_cast::{
59 is_cast_expr_and_support_unwrap_cast_in_comparison_for_binary,
60 is_cast_expr_and_support_unwrap_cast_in_comparison_for_inlist,
61 unwrap_cast_in_comparison_for_binary,
62};
63use crate::simplify_expressions::SimplifyInfo;
64use datafusion_expr_common::casts::try_cast_literal_to_type;
65use indexmap::IndexSet;
66use regex::Regex;
67
68pub struct ExprSimplifier<S> {
102 info: S,
103 guarantees: Vec<(Expr, NullableInterval)>,
106 canonicalize: bool,
109 max_simplifier_cycles: u32,
111}
112
113pub const THRESHOLD_INLINE_INLIST: usize = 3;
114pub const DEFAULT_MAX_SIMPLIFIER_CYCLES: u32 = 3;
115
116impl<S: SimplifyInfo> ExprSimplifier<S> {
117 pub fn new(info: S) -> Self {
123 Self {
124 info,
125 guarantees: vec![],
126 canonicalize: true,
127 max_simplifier_cycles: DEFAULT_MAX_SIMPLIFIER_CYCLES,
128 }
129 }
130
131 pub fn simplify(&self, expr: Expr) -> Result<Expr> {
193 Ok(self.simplify_with_cycle_count_transformed(expr)?.0.data)
194 }
195
196 #[deprecated(
203 since = "48.0.0",
204 note = "Use `simplify_with_cycle_count_transformed` instead"
205 )]
206 #[allow(unused_mut)]
207 pub fn simplify_with_cycle_count(&self, mut expr: Expr) -> Result<(Expr, u32)> {
208 let (transformed, cycle_count) =
209 self.simplify_with_cycle_count_transformed(expr)?;
210 Ok((transformed.data, cycle_count))
211 }
212
213 pub fn simplify_with_cycle_count_transformed(
226 &self,
227 mut expr: Expr,
228 ) -> Result<(Transformed<Expr>, u32)> {
229 let mut simplifier = Simplifier::new(&self.info);
230 let mut const_evaluator = ConstEvaluator::try_new(self.info.execution_props())?;
231 let mut shorten_in_list_simplifier = ShortenInListSimplifier::new();
232 let mut guarantee_rewriter = GuaranteeRewriter::new(&self.guarantees);
233
234 if self.canonicalize {
235 expr = expr.rewrite(&mut Canonicalizer::new()).data()?
236 }
237
238 let mut num_cycles = 0;
242 let mut has_transformed = false;
243 loop {
244 let Transformed {
245 data, transformed, ..
246 } = expr
247 .rewrite(&mut const_evaluator)?
248 .transform_data(|expr| expr.rewrite(&mut simplifier))?
249 .transform_data(|expr| expr.rewrite(&mut guarantee_rewriter))?;
250 expr = data;
251 num_cycles += 1;
252 has_transformed = has_transformed || transformed;
254 if !transformed || num_cycles >= self.max_simplifier_cycles {
255 break;
256 }
257 }
258 expr = expr.rewrite(&mut shorten_in_list_simplifier).data()?;
260 Ok((
261 Transformed::new_transformed(expr, has_transformed),
262 num_cycles,
263 ))
264 }
265
266 pub fn coerce(&self, expr: Expr, schema: &DFSchema) -> Result<Expr> {
272 let mut expr_rewrite = TypeCoercionRewriter { schema };
273 expr.rewrite_with_schema(schema, &mut expr_rewrite).data()
274 }
275
276 pub fn with_guarantees(mut self, guarantees: Vec<(Expr, NullableInterval)>) -> Self {
334 self.guarantees = guarantees;
335 self
336 }
337
338 pub fn with_canonicalize(mut self, canonicalize: bool) -> Self {
388 self.canonicalize = canonicalize;
389 self
390 }
391
392 pub fn with_max_cycles(mut self, max_simplifier_cycles: u32) -> Self {
444 self.max_simplifier_cycles = max_simplifier_cycles;
445 self
446 }
447}
448
449struct Canonicalizer {}
456
457impl Canonicalizer {
458 fn new() -> Self {
459 Self {}
460 }
461}
462
463impl TreeNodeRewriter for Canonicalizer {
464 type Node = Expr;
465
466 fn f_up(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
467 let Expr::BinaryExpr(BinaryExpr { left, op, right }) = expr else {
468 return Ok(Transformed::no(expr));
469 };
470 match (left.as_ref(), right.as_ref(), op.swap()) {
471 (Expr::Column(left_col), Expr::Column(right_col), Some(swapped_op))
473 if right_col > left_col =>
474 {
475 Ok(Transformed::yes(Expr::BinaryExpr(BinaryExpr {
476 left: right,
477 op: swapped_op,
478 right: left,
479 })))
480 }
481 (Expr::Literal(_a, _), Expr::Column(_b), Some(swapped_op)) => {
483 Ok(Transformed::yes(Expr::BinaryExpr(BinaryExpr {
484 left: right,
485 op: swapped_op,
486 right: left,
487 })))
488 }
489 _ => Ok(Transformed::no(Expr::BinaryExpr(BinaryExpr {
490 left,
491 op,
492 right,
493 }))),
494 }
495 }
496}
497
498#[allow(rustdoc::private_intra_doc_links)]
499struct ConstEvaluator<'a> {
504 can_evaluate: Vec<bool>,
517
518 execution_props: &'a ExecutionProps,
519 input_schema: DFSchema,
520 input_batch: RecordBatch,
521}
522
523#[allow(dead_code)]
524enum ConstSimplifyResult {
526 Simplified(ScalarValue, Option<FieldMetadata>),
528 NotSimplified(ScalarValue, Option<FieldMetadata>),
530 SimplifyRuntimeError(DataFusionError, Expr),
532}
533
534impl TreeNodeRewriter for ConstEvaluator<'_> {
535 type Node = Expr;
536
537 fn f_down(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
538 self.can_evaluate.push(true);
540
541 if !Self::can_evaluate(&expr) {
546 let parent_iter = self.can_evaluate.iter_mut().rev();
548 for p in parent_iter {
549 if !*p {
550 break;
553 }
554 *p = false;
555 }
556 }
557
558 Ok(Transformed::no(expr))
562 }
563
564 fn f_up(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
565 match self.can_evaluate.pop() {
566 Some(true) => match self.evaluate_to_scalar(expr) {
571 ConstSimplifyResult::Simplified(s, m) => {
572 Ok(Transformed::yes(Expr::Literal(s, m)))
573 }
574 ConstSimplifyResult::NotSimplified(s, m) => {
575 Ok(Transformed::no(Expr::Literal(s, m)))
576 }
577 ConstSimplifyResult::SimplifyRuntimeError(err, expr) => {
578 if let Expr::Cast(Cast { ref expr, .. })
581 | Expr::TryCast(TryCast { ref expr, .. }) = expr
582 {
583 if matches!(expr.as_ref(), Expr::Literal(_, _)) {
584 return Err(err);
585 }
586 }
587 Ok(Transformed::yes(expr))
590 }
591 },
592 Some(false) => Ok(Transformed::no(expr)),
593 _ => internal_err!("Failed to pop can_evaluate"),
594 }
595 }
596}
597
598impl<'a> ConstEvaluator<'a> {
599 pub fn try_new(execution_props: &'a ExecutionProps) -> Result<Self> {
603 static DUMMY_COL_NAME: &str = ".";
606 let schema = Arc::new(Schema::new(vec![Field::new(
607 DUMMY_COL_NAME,
608 DataType::Null,
609 true,
610 )]));
611 let input_schema = DFSchema::try_from(Arc::clone(&schema))?;
612 let col = new_null_array(&DataType::Null, 1);
614 let input_batch = RecordBatch::try_new(schema, vec![col])?;
615
616 Ok(Self {
617 can_evaluate: vec![],
618 execution_props,
619 input_schema,
620 input_batch,
621 })
622 }
623
624 fn volatility_ok(volatility: Volatility) -> bool {
626 match volatility {
627 Volatility::Immutable => true,
628 Volatility::Stable => true,
630 Volatility::Volatile => false,
631 }
632 }
633
634 fn can_evaluate(expr: &Expr) -> bool {
637 match expr {
643 #[expect(deprecated)]
645 Expr::AggregateFunction { .. }
646 | Expr::ScalarVariable(_, _)
647 | Expr::Column(_)
648 | Expr::OuterReferenceColumn(_, _)
649 | Expr::Exists { .. }
650 | Expr::InSubquery(_)
651 | Expr::ScalarSubquery(_)
652 | Expr::WindowFunction { .. }
653 | Expr::GroupingSet(_)
654 | Expr::Wildcard { .. }
655 | Expr::Placeholder(_)
656 | Expr::Lambda { .. } => false,
657 Expr::ScalarFunction(ScalarFunction { func, .. }) => {
658 Self::volatility_ok(func.signature().volatility)
659 }
660 Expr::Literal(_, _)
661 | Expr::Alias(..)
662 | Expr::Unnest(_)
663 | Expr::BinaryExpr { .. }
664 | Expr::Not(_)
665 | Expr::IsNotNull(_)
666 | Expr::IsNull(_)
667 | Expr::IsTrue(_)
668 | Expr::IsFalse(_)
669 | Expr::IsUnknown(_)
670 | Expr::IsNotTrue(_)
671 | Expr::IsNotFalse(_)
672 | Expr::IsNotUnknown(_)
673 | Expr::Negative(_)
674 | Expr::Between { .. }
675 | Expr::Like { .. }
676 | Expr::SimilarTo { .. }
677 | Expr::Case(_)
678 | Expr::Cast { .. }
679 | Expr::TryCast { .. }
680 | Expr::InList { .. } => true,
681 }
682 }
683
684 pub(crate) fn evaluate_to_scalar(&mut self, expr: Expr) -> ConstSimplifyResult {
686 if let Expr::Literal(s, m) = expr {
687 return ConstSimplifyResult::NotSimplified(s, m);
688 }
689
690 let phys_expr =
691 match create_physical_expr(&expr, &self.input_schema, self.execution_props) {
692 Ok(e) => e,
693 Err(err) => return ConstSimplifyResult::SimplifyRuntimeError(err, expr),
694 };
695 let metadata = phys_expr
696 .return_field(self.input_batch.schema_ref())
697 .ok()
698 .and_then(|f| {
699 let m = f.metadata();
700 match m.is_empty() {
701 true => None,
702 false => Some(FieldMetadata::from(m)),
703 }
704 });
705 let col_val = match phys_expr.evaluate(&self.input_batch) {
706 Ok(v) => v,
707 Err(err) => return ConstSimplifyResult::SimplifyRuntimeError(err, expr),
708 };
709 match col_val {
710 ColumnarValue::Array(a) => {
711 if a.len() != 1 {
712 ConstSimplifyResult::SimplifyRuntimeError(
713 exec_datafusion_err!("Could not evaluate the expression, found a result of length {}", a.len()),
714 expr,
715 )
716 } else if as_list_array(&a).is_ok() {
717 ConstSimplifyResult::Simplified(
718 ScalarValue::List(a.as_list::<i32>().to_owned().into()),
719 metadata,
720 )
721 } else if as_large_list_array(&a).is_ok() {
722 ConstSimplifyResult::Simplified(
723 ScalarValue::LargeList(a.as_list::<i64>().to_owned().into()),
724 metadata,
725 )
726 } else {
727 match ScalarValue::try_from_array(&a, 0) {
729 Ok(s) => ConstSimplifyResult::Simplified(s, metadata),
730 Err(err) => ConstSimplifyResult::SimplifyRuntimeError(err, expr),
731 }
732 }
733 }
734 ColumnarValue::Scalar(s) => ConstSimplifyResult::Simplified(s, metadata),
735 }
736 }
737}
738
739struct Simplifier<'a, S> {
749 info: &'a S,
750}
751
752impl<'a, S> Simplifier<'a, S> {
753 pub fn new(info: &'a S) -> Self {
754 Self { info }
755 }
756}
757
758impl<S: SimplifyInfo> TreeNodeRewriter for Simplifier<'_, S> {
759 type Node = Expr;
760
761 fn f_down(&mut self, expr: Self::Node) -> Result<Transformed<Expr>> {
762 match expr {
763 Expr::ScalarFunction(ScalarFunction { func, args })
764 if args.iter().any(|arg| matches!(arg, Expr::Lambda(_))) =>
765 {
766 let mut columns_refs = HashSet::new();
776
777 for arg in &args {
778 arg.add_column_refs(&mut columns_refs);
779 }
780
781 let qualified_fields = columns_refs
783 .into_iter()
784 .map(|captured_column| {
785 let expr = Expr::Column(captured_column.clone());
786
787 Ok((
788 captured_column.relation.clone(),
789 Arc::new(Field::new(
790 captured_column.name(),
791 self.info.get_data_type(&expr)?,
792 self.info.nullable(&expr)?,
793 )),
794 ))
795 })
796 .collect::<Result<_>>()?;
797
798 let dfschema =
800 DFSchema::new_with_metadata(qualified_fields, Default::default())?;
801
802 let mut scoped_schemas = func
803 .arguments_schema_from_logical_args(&args, &dfschema)?
804 .into_iter();
805
806 let transformed_args = args
807 .map_elements(|arg| {
808 let scoped_schema = scoped_schemas.next().unwrap();
809
810 let simplify_context =
812 SimplifyContext::new(self.info.execution_props())
813 .with_schema(Arc::new(scoped_schema.into_owned()));
814
815 let mut simplifier = Simplifier::new(&simplify_context);
816
817 arg.rewrite(&mut simplifier)
819 })?
820 .update_data(|args| {
821 Expr::ScalarFunction(ScalarFunction { func, args })
822 });
823
824 Ok(Transformed::new(
825 transformed_args.data,
826 transformed_args.transformed,
827 match transformed_args.tnr {
830 TreeNodeRecursion::Continue | TreeNodeRecursion::Jump => {
831 TreeNodeRecursion::Jump
832 }
833 TreeNodeRecursion::Stop => TreeNodeRecursion::Stop,
834 },
835 ))
836
837 }
839 _ => Ok(Transformed::no(expr)),
841 }
842 }
843
844 fn f_up(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
846 use datafusion_expr::Operator::{
847 And, BitwiseAnd, BitwiseOr, BitwiseShiftLeft, BitwiseShiftRight, BitwiseXor,
848 Divide, Eq, Modulo, Multiply, NotEq, Or, RegexIMatch, RegexMatch,
849 RegexNotIMatch, RegexNotMatch,
850 };
851
852 let info = self.info;
853 Ok(match expr {
854 ref expr @ Expr::BinaryExpr(BinaryExpr {
858 ref left,
859 ref op,
860 ref right,
861 }) if op.returns_null_on_null()
862 && (is_null(left.as_ref()) || is_null(right.as_ref())) =>
863 {
864 Transformed::yes(Expr::Literal(
865 ScalarValue::try_new_null(&info.get_data_type(expr)?)?,
866 None,
867 ))
868 }
869
870 Expr::BinaryExpr(BinaryExpr {
872 left,
873 op: And | Or,
874 right,
875 }) if is_null(&left) && is_null(&right) => Transformed::yes(lit_bool_null()),
876
877 Expr::BinaryExpr(BinaryExpr {
885 left,
886 op: Eq,
887 right,
888 }) if is_bool_lit(&left) && info.is_boolean_type(&right)? => {
889 Transformed::yes(match as_bool_lit(&left)? {
890 Some(true) => *right,
891 Some(false) => Expr::Not(right),
892 None => lit_bool_null(),
893 })
894 }
895 Expr::BinaryExpr(BinaryExpr {
899 left,
900 op: Eq,
901 right,
902 }) if is_bool_lit(&right) && info.is_boolean_type(&left)? => {
903 Transformed::yes(match as_bool_lit(&right)? {
904 Some(true) => *left,
905 Some(false) => Expr::Not(left),
906 None => lit_bool_null(),
907 })
908 }
909 Expr::BinaryExpr(BinaryExpr {
914 left,
915 op: Eq,
916 right,
917 }) if (left == right) & !left.is_volatile() => {
918 Transformed::yes(match !info.nullable(&left)? {
919 true => lit(true),
920 false => Expr::BinaryExpr(BinaryExpr {
921 left: Box::new(Expr::IsNotNull(left)),
922 op: Or,
923 right: Box::new(lit_bool_null()),
924 }),
925 })
926 }
927
928 Expr::BinaryExpr(BinaryExpr {
935 left,
936 op: NotEq,
937 right,
938 }) if is_bool_lit(&left) && info.is_boolean_type(&right)? => {
939 Transformed::yes(match as_bool_lit(&left)? {
940 Some(true) => Expr::Not(right),
941 Some(false) => *right,
942 None => lit_bool_null(),
943 })
944 }
945 Expr::BinaryExpr(BinaryExpr {
949 left,
950 op: NotEq,
951 right,
952 }) if is_bool_lit(&right) && info.is_boolean_type(&left)? => {
953 Transformed::yes(match as_bool_lit(&right)? {
954 Some(true) => Expr::Not(left),
955 Some(false) => *left,
956 None => lit_bool_null(),
957 })
958 }
959
960 Expr::BinaryExpr(BinaryExpr {
966 left,
967 op: Or,
968 right: _,
969 }) if is_true(&left) => Transformed::yes(*left),
970 Expr::BinaryExpr(BinaryExpr {
972 left,
973 op: Or,
974 right,
975 }) if is_false(&left) => Transformed::yes(*right),
976 Expr::BinaryExpr(BinaryExpr {
978 left: _,
979 op: Or,
980 right,
981 }) if is_true(&right) => Transformed::yes(*right),
982 Expr::BinaryExpr(BinaryExpr {
984 left,
985 op: Or,
986 right,
987 }) if is_false(&right) => Transformed::yes(*left),
988 Expr::BinaryExpr(BinaryExpr {
990 left,
991 op: Or,
992 right,
993 }) if is_not_of(&right, &left) && !info.nullable(&left)? => {
994 Transformed::yes(lit(true))
995 }
996 Expr::BinaryExpr(BinaryExpr {
998 left,
999 op: Or,
1000 right,
1001 }) if is_not_of(&left, &right) && !info.nullable(&right)? => {
1002 Transformed::yes(lit(true))
1003 }
1004 Expr::BinaryExpr(BinaryExpr {
1006 left,
1007 op: Or,
1008 right,
1009 }) if expr_contains(&left, &right, Or) => Transformed::yes(*left),
1010 Expr::BinaryExpr(BinaryExpr {
1012 left,
1013 op: Or,
1014 right,
1015 }) if expr_contains(&right, &left, Or) => Transformed::yes(*right),
1016 Expr::BinaryExpr(BinaryExpr {
1018 left,
1019 op: Or,
1020 right,
1021 }) if is_op_with(And, &right, &left) => Transformed::yes(*left),
1022 Expr::BinaryExpr(BinaryExpr {
1024 left,
1025 op: Or,
1026 right,
1027 }) if is_op_with(And, &left, &right) => Transformed::yes(*right),
1028 Expr::BinaryExpr(BinaryExpr {
1031 left,
1032 op: Or,
1033 right,
1034 }) if has_common_conjunction(&left, &right) => {
1035 let lhs: IndexSet<Expr> = iter_conjunction_owned(*left).collect();
1036 let (common, rhs): (Vec<_>, Vec<_>) = iter_conjunction_owned(*right)
1037 .partition(|e| lhs.contains(e) && !e.is_volatile());
1038
1039 let new_rhs = rhs.into_iter().reduce(and);
1040 let new_lhs = lhs.into_iter().filter(|e| !common.contains(e)).reduce(and);
1041 let common_conjunction = common.into_iter().reduce(and).unwrap();
1042
1043 let new_expr = match (new_lhs, new_rhs) {
1044 (Some(lhs), Some(rhs)) => and(common_conjunction, or(lhs, rhs)),
1045 (_, _) => common_conjunction,
1046 };
1047 Transformed::yes(new_expr)
1048 }
1049
1050 Expr::BinaryExpr(BinaryExpr {
1056 left,
1057 op: And,
1058 right,
1059 }) if is_true(&left) => Transformed::yes(*right),
1060 Expr::BinaryExpr(BinaryExpr {
1062 left,
1063 op: And,
1064 right: _,
1065 }) if is_false(&left) => Transformed::yes(*left),
1066 Expr::BinaryExpr(BinaryExpr {
1068 left,
1069 op: And,
1070 right,
1071 }) if is_true(&right) => Transformed::yes(*left),
1072 Expr::BinaryExpr(BinaryExpr {
1074 left: _,
1075 op: And,
1076 right,
1077 }) if is_false(&right) => Transformed::yes(*right),
1078 Expr::BinaryExpr(BinaryExpr {
1080 left,
1081 op: And,
1082 right,
1083 }) if is_not_of(&right, &left) && !info.nullable(&left)? => {
1084 Transformed::yes(lit(false))
1085 }
1086 Expr::BinaryExpr(BinaryExpr {
1088 left,
1089 op: And,
1090 right,
1091 }) if is_not_of(&left, &right) && !info.nullable(&right)? => {
1092 Transformed::yes(lit(false))
1093 }
1094 Expr::BinaryExpr(BinaryExpr {
1096 left,
1097 op: And,
1098 right,
1099 }) if expr_contains(&left, &right, And) => Transformed::yes(*left),
1100 Expr::BinaryExpr(BinaryExpr {
1102 left,
1103 op: And,
1104 right,
1105 }) if expr_contains(&right, &left, And) => Transformed::yes(*right),
1106 Expr::BinaryExpr(BinaryExpr {
1108 left,
1109 op: And,
1110 right,
1111 }) if is_op_with(Or, &right, &left) => Transformed::yes(*left),
1112 Expr::BinaryExpr(BinaryExpr {
1114 left,
1115 op: And,
1116 right,
1117 }) if is_op_with(Or, &left, &right) => Transformed::yes(*right),
1118 Expr::BinaryExpr(BinaryExpr {
1120 left,
1121 op: And,
1122 right,
1123 }) if can_reduce_to_equal_statement(&left, &right) => {
1124 if let Expr::BinaryExpr(BinaryExpr {
1125 left: left_left,
1126 right: left_right,
1127 ..
1128 }) = *left
1129 {
1130 Transformed::yes(Expr::BinaryExpr(BinaryExpr {
1131 left: left_left,
1132 op: Eq,
1133 right: left_right,
1134 }))
1135 } else {
1136 return internal_err!("can_reduce_to_equal_statement should only be called with a BinaryExpr");
1137 }
1138 }
1139
1140 Expr::BinaryExpr(BinaryExpr {
1146 left,
1147 op: Multiply,
1148 right,
1149 }) if is_one(&right) => {
1150 simplify_right_is_one_case(info, left, &Multiply, &right)?
1151 }
1152 Expr::BinaryExpr(BinaryExpr {
1154 left,
1155 op: Multiply,
1156 right,
1157 }) if is_one(&left) => {
1158 simplify_right_is_one_case(info, right, &Multiply, &left)?
1160 }
1161
1162 Expr::BinaryExpr(BinaryExpr {
1164 left,
1165 op: Multiply,
1166 right,
1167 }) if !info.nullable(&left)?
1168 && !info.get_data_type(&left)?.is_floating()
1169 && is_zero(&right) =>
1170 {
1171 Transformed::yes(*right)
1172 }
1173 Expr::BinaryExpr(BinaryExpr {
1175 left,
1176 op: Multiply,
1177 right,
1178 }) if !info.nullable(&right)?
1179 && !info.get_data_type(&right)?.is_floating()
1180 && is_zero(&left) =>
1181 {
1182 Transformed::yes(*left)
1183 }
1184
1185 Expr::BinaryExpr(BinaryExpr {
1191 left,
1192 op: Divide,
1193 right,
1194 }) if is_one(&right) => {
1195 simplify_right_is_one_case(info, left, &Divide, &right)?
1196 }
1197
1198 Expr::BinaryExpr(BinaryExpr {
1204 left,
1205 op: Modulo,
1206 right,
1207 }) if !info.nullable(&left)?
1208 && !info.get_data_type(&left)?.is_floating()
1209 && is_one(&right) =>
1210 {
1211 Transformed::yes(Expr::Literal(
1212 ScalarValue::new_zero(&info.get_data_type(&left)?)?,
1213 None,
1214 ))
1215 }
1216
1217 Expr::BinaryExpr(BinaryExpr {
1223 left,
1224 op: BitwiseAnd,
1225 right,
1226 }) if !info.nullable(&left)? && is_zero(&right) => Transformed::yes(*right),
1227
1228 Expr::BinaryExpr(BinaryExpr {
1230 left,
1231 op: BitwiseAnd,
1232 right,
1233 }) if !info.nullable(&right)? && is_zero(&left) => Transformed::yes(*left),
1234
1235 Expr::BinaryExpr(BinaryExpr {
1237 left,
1238 op: BitwiseAnd,
1239 right,
1240 }) if is_negative_of(&left, &right) && !info.nullable(&right)? => {
1241 Transformed::yes(Expr::Literal(
1242 ScalarValue::new_zero(&info.get_data_type(&left)?)?,
1243 None,
1244 ))
1245 }
1246
1247 Expr::BinaryExpr(BinaryExpr {
1249 left,
1250 op: BitwiseAnd,
1251 right,
1252 }) if is_negative_of(&right, &left) && !info.nullable(&left)? => {
1253 Transformed::yes(Expr::Literal(
1254 ScalarValue::new_zero(&info.get_data_type(&left)?)?,
1255 None,
1256 ))
1257 }
1258
1259 Expr::BinaryExpr(BinaryExpr {
1261 left,
1262 op: BitwiseAnd,
1263 right,
1264 }) if expr_contains(&left, &right, BitwiseAnd) => Transformed::yes(*left),
1265
1266 Expr::BinaryExpr(BinaryExpr {
1268 left,
1269 op: BitwiseAnd,
1270 right,
1271 }) if expr_contains(&right, &left, BitwiseAnd) => Transformed::yes(*right),
1272
1273 Expr::BinaryExpr(BinaryExpr {
1275 left,
1276 op: BitwiseAnd,
1277 right,
1278 }) if !info.nullable(&right)? && is_op_with(BitwiseOr, &right, &left) => {
1279 Transformed::yes(*left)
1280 }
1281
1282 Expr::BinaryExpr(BinaryExpr {
1284 left,
1285 op: BitwiseAnd,
1286 right,
1287 }) if !info.nullable(&left)? && is_op_with(BitwiseOr, &left, &right) => {
1288 Transformed::yes(*right)
1289 }
1290
1291 Expr::BinaryExpr(BinaryExpr {
1297 left,
1298 op: BitwiseOr,
1299 right,
1300 }) if is_zero(&right) => Transformed::yes(*left),
1301
1302 Expr::BinaryExpr(BinaryExpr {
1304 left,
1305 op: BitwiseOr,
1306 right,
1307 }) if is_zero(&left) => Transformed::yes(*right),
1308
1309 Expr::BinaryExpr(BinaryExpr {
1311 left,
1312 op: BitwiseOr,
1313 right,
1314 }) if is_negative_of(&left, &right) && !info.nullable(&right)? => {
1315 Transformed::yes(Expr::Literal(
1316 ScalarValue::new_negative_one(&info.get_data_type(&left)?)?,
1317 None,
1318 ))
1319 }
1320
1321 Expr::BinaryExpr(BinaryExpr {
1323 left,
1324 op: BitwiseOr,
1325 right,
1326 }) if is_negative_of(&right, &left) && !info.nullable(&left)? => {
1327 Transformed::yes(Expr::Literal(
1328 ScalarValue::new_negative_one(&info.get_data_type(&left)?)?,
1329 None,
1330 ))
1331 }
1332
1333 Expr::BinaryExpr(BinaryExpr {
1335 left,
1336 op: BitwiseOr,
1337 right,
1338 }) if expr_contains(&left, &right, BitwiseOr) => Transformed::yes(*left),
1339
1340 Expr::BinaryExpr(BinaryExpr {
1342 left,
1343 op: BitwiseOr,
1344 right,
1345 }) if expr_contains(&right, &left, BitwiseOr) => Transformed::yes(*right),
1346
1347 Expr::BinaryExpr(BinaryExpr {
1349 left,
1350 op: BitwiseOr,
1351 right,
1352 }) if !info.nullable(&right)? && is_op_with(BitwiseAnd, &right, &left) => {
1353 Transformed::yes(*left)
1354 }
1355
1356 Expr::BinaryExpr(BinaryExpr {
1358 left,
1359 op: BitwiseOr,
1360 right,
1361 }) if !info.nullable(&left)? && is_op_with(BitwiseAnd, &left, &right) => {
1362 Transformed::yes(*right)
1363 }
1364
1365 Expr::BinaryExpr(BinaryExpr {
1371 left,
1372 op: BitwiseXor,
1373 right,
1374 }) if !info.nullable(&left)? && is_zero(&right) => Transformed::yes(*left),
1375
1376 Expr::BinaryExpr(BinaryExpr {
1378 left,
1379 op: BitwiseXor,
1380 right,
1381 }) if !info.nullable(&right)? && is_zero(&left) => Transformed::yes(*right),
1382
1383 Expr::BinaryExpr(BinaryExpr {
1385 left,
1386 op: BitwiseXor,
1387 right,
1388 }) if is_negative_of(&left, &right) && !info.nullable(&right)? => {
1389 Transformed::yes(Expr::Literal(
1390 ScalarValue::new_negative_one(&info.get_data_type(&left)?)?,
1391 None,
1392 ))
1393 }
1394
1395 Expr::BinaryExpr(BinaryExpr {
1397 left,
1398 op: BitwiseXor,
1399 right,
1400 }) if is_negative_of(&right, &left) && !info.nullable(&left)? => {
1401 Transformed::yes(Expr::Literal(
1402 ScalarValue::new_negative_one(&info.get_data_type(&left)?)?,
1403 None,
1404 ))
1405 }
1406
1407 Expr::BinaryExpr(BinaryExpr {
1409 left,
1410 op: BitwiseXor,
1411 right,
1412 }) if expr_contains(&left, &right, BitwiseXor) => {
1413 let expr = delete_xor_in_complex_expr(&left, &right, false);
1414 Transformed::yes(if expr == *right {
1415 Expr::Literal(
1416 ScalarValue::new_zero(&info.get_data_type(&right)?)?,
1417 None,
1418 )
1419 } else {
1420 expr
1421 })
1422 }
1423
1424 Expr::BinaryExpr(BinaryExpr {
1426 left,
1427 op: BitwiseXor,
1428 right,
1429 }) if expr_contains(&right, &left, BitwiseXor) => {
1430 let expr = delete_xor_in_complex_expr(&right, &left, true);
1431 Transformed::yes(if expr == *left {
1432 Expr::Literal(
1433 ScalarValue::new_zero(&info.get_data_type(&left)?)?,
1434 None,
1435 )
1436 } else {
1437 expr
1438 })
1439 }
1440
1441 Expr::BinaryExpr(BinaryExpr {
1447 left,
1448 op: BitwiseShiftRight,
1449 right,
1450 }) if is_zero(&right) => Transformed::yes(*left),
1451
1452 Expr::BinaryExpr(BinaryExpr {
1458 left,
1459 op: BitwiseShiftLeft,
1460 right,
1461 }) if is_zero(&right) => Transformed::yes(*left),
1462
1463 Expr::Not(inner) => Transformed::yes(negate_clause(*inner)),
1467
1468 Expr::Negative(inner) => Transformed::yes(distribute_negation(*inner)),
1472
1473 Expr::BinaryExpr(BinaryExpr {
1481 left,
1482 op: op @ (Eq | NotEq),
1483 right,
1484 }) if is_case_with_literal_outputs(&left) && is_lit(&right) => {
1485 let case = into_case(*left)?;
1486 Transformed::yes(Expr::Case(Case {
1487 expr: None,
1488 when_then_expr: case
1489 .when_then_expr
1490 .into_iter()
1491 .map(|(when, then)| {
1492 (
1493 when,
1494 Box::new(Expr::BinaryExpr(BinaryExpr {
1495 left: then,
1496 op,
1497 right: right.clone(),
1498 })),
1499 )
1500 })
1501 .collect(),
1502 else_expr: case.else_expr.map(|els| {
1503 Box::new(Expr::BinaryExpr(BinaryExpr {
1504 left: els,
1505 op,
1506 right,
1507 }))
1508 }),
1509 }))
1510 }
1511
1512 Expr::Case(Case {
1518 expr: None,
1519 when_then_expr,
1520 mut else_expr,
1521 }) if when_then_expr
1522 .iter()
1523 .any(|(when, _)| is_true(when.as_ref()) || is_false(when.as_ref())) =>
1524 {
1525 let out_type = info.get_data_type(&when_then_expr[0].1)?;
1526 let mut new_when_then_expr = Vec::with_capacity(when_then_expr.len());
1527
1528 for (when, then) in when_then_expr.into_iter() {
1529 if is_true(when.as_ref()) {
1530 else_expr = Some(then);
1533 break;
1534 } else if !is_false(when.as_ref()) {
1535 new_when_then_expr.push((when, then));
1536 }
1537 }
1539
1540 if new_when_then_expr.is_empty() {
1542 if let Some(else_expr) = else_expr {
1544 return Ok(Transformed::yes(*else_expr));
1545 } else {
1547 let null =
1548 Expr::Literal(ScalarValue::try_new_null(&out_type)?, None);
1549 return Ok(Transformed::yes(null));
1550 }
1551 }
1552
1553 Transformed::yes(Expr::Case(Case {
1554 expr: None,
1555 when_then_expr: new_when_then_expr,
1556 else_expr,
1557 }))
1558 }
1559
1560 Expr::Case(Case {
1572 expr: None,
1573 when_then_expr,
1574 else_expr,
1575 }) if !when_then_expr.is_empty()
1576 && (when_then_expr.len() < 3 || (when_then_expr.iter().all(|(_, then)| is_bool_lit(then))
1580 && when_then_expr.iter().filter(|(_, then)| is_true(then)).count() < 3))
1581 && info.is_boolean_type(&when_then_expr[0].1)? =>
1582 {
1583 let mut filter_expr = lit(false);
1585 let mut out_expr = lit(false);
1587
1588 for (when, then) in when_then_expr {
1589 let when = is_exactly_true(*when, info)?;
1590 let case_expr =
1591 when.clone().and(filter_expr.clone().not()).and(*then);
1592
1593 out_expr = out_expr.or(case_expr);
1594 filter_expr = filter_expr.or(when);
1595 }
1596
1597 let else_expr = else_expr.map(|b| *b).unwrap_or_else(lit_bool_null);
1598 let case_expr = filter_expr.not().and(else_expr);
1599 out_expr = out_expr.or(case_expr);
1600
1601 out_expr.rewrite(self)?
1603 }
1604 Expr::Case(Case {
1625 expr: None,
1626 when_then_expr,
1627 else_expr,
1628 }) if !when_then_expr.is_empty()
1629 && when_then_expr
1630 .iter()
1631 .all(|(_, then)| is_bool_lit(then)) && when_then_expr
1634 .iter()
1635 .filter(|(_, then)| is_false(then))
1636 .count()
1637 < 3
1638 && else_expr.as_deref().is_none_or(is_bool_lit) =>
1639 {
1640 Transformed::yes(
1641 Expr::Case(Case {
1642 expr: None,
1643 when_then_expr: when_then_expr
1644 .into_iter()
1645 .map(|(when, then)| (when, Box::new(Expr::Not(then))))
1646 .collect(),
1647 else_expr: else_expr
1648 .map(|else_expr| Box::new(Expr::Not(else_expr))),
1649 })
1650 .not(),
1651 )
1652 }
1653 Expr::ScalarFunction(ScalarFunction { func: udf, args }) => {
1654 match udf.simplify(args, info)? {
1655 ExprSimplifyResult::Original(args) => {
1656 Transformed::no(Expr::ScalarFunction(ScalarFunction {
1657 func: udf,
1658 args,
1659 }))
1660 }
1661 ExprSimplifyResult::Simplified(expr) => Transformed::yes(expr),
1662 }
1663 }
1664
1665 Expr::AggregateFunction(datafusion_expr::expr::AggregateFunction {
1666 ref func,
1667 ..
1668 }) => match (func.simplify(), expr) {
1669 (Some(simplify_function), Expr::AggregateFunction(af)) => {
1670 Transformed::yes(simplify_function(af, info)?)
1671 }
1672 (_, expr) => Transformed::no(expr),
1673 },
1674
1675 Expr::WindowFunction(ref window_fun) => match (window_fun.simplify(), expr) {
1676 (Some(simplify_function), Expr::WindowFunction(wf)) => {
1677 Transformed::yes(simplify_function(*wf, info)?)
1678 }
1679 (_, expr) => Transformed::no(expr),
1680 },
1681
1682 Expr::Between(between) => Transformed::yes(if between.negated {
1689 let l = *between.expr.clone();
1690 let r = *between.expr;
1691 or(l.lt(*between.low), r.gt(*between.high))
1692 } else {
1693 and(
1694 between.expr.clone().gt_eq(*between.low),
1695 between.expr.lt_eq(*between.high),
1696 )
1697 }),
1698
1699 Expr::BinaryExpr(BinaryExpr {
1703 left,
1704 op: op @ (RegexMatch | RegexNotMatch | RegexIMatch | RegexNotIMatch),
1705 right,
1706 }) => Transformed::yes(simplify_regex_expr(left, op, right)?),
1707
1708 Expr::Like(like) => {
1710 let escape_char = like.escape_char.unwrap_or('\\');
1712 match as_string_scalar(&like.pattern) {
1713 Some((data_type, pattern_str)) => {
1714 match pattern_str {
1715 None => return Ok(Transformed::yes(lit_bool_null())),
1716 Some(pattern_str) if pattern_str == "%" => {
1717 let result_for_non_null = lit(!like.negated);
1724 Transformed::yes(if !info.nullable(&like.expr)? {
1725 result_for_non_null
1726 } else {
1727 Expr::Case(Case {
1728 expr: Some(Box::new(Expr::IsNotNull(like.expr))),
1729 when_then_expr: vec![(
1730 Box::new(lit(true)),
1731 Box::new(result_for_non_null),
1732 )],
1733 else_expr: None,
1734 })
1735 })
1736 }
1737 Some(pattern_str)
1738 if pattern_str.contains("%%")
1739 && !pattern_str.contains(escape_char) =>
1740 {
1741 let simplified_pattern = Regex::new("%%+")
1744 .unwrap()
1745 .replace_all(pattern_str, "%")
1746 .to_string();
1747 Transformed::yes(Expr::Like(Like {
1748 pattern: Box::new(to_string_scalar(
1749 data_type,
1750 Some(simplified_pattern),
1751 )),
1752 ..like
1753 }))
1754 }
1755 Some(pattern_str)
1756 if !like.case_insensitive
1757 && !pattern_str
1758 .contains(['%', '_', escape_char].as_ref()) =>
1759 {
1760 Transformed::yes(Expr::BinaryExpr(BinaryExpr {
1763 left: like.expr.clone(),
1764 op: if like.negated { NotEq } else { Eq },
1765 right: like.pattern.clone(),
1766 }))
1767 }
1768
1769 Some(_pattern_str) => Transformed::no(Expr::Like(like)),
1770 }
1771 }
1772 None => Transformed::no(Expr::Like(like)),
1773 }
1774 }
1775
1776 Expr::IsNotNull(expr) | Expr::IsNotUnknown(expr)
1778 if !info.nullable(&expr)? =>
1779 {
1780 Transformed::yes(lit(true))
1781 }
1782
1783 Expr::IsNull(expr) | Expr::IsUnknown(expr) if !info.nullable(&expr)? => {
1785 Transformed::yes(lit(false))
1786 }
1787
1788 Expr::InList(InList {
1791 expr: _,
1792 list,
1793 negated,
1794 }) if list.is_empty() => Transformed::yes(lit(negated)),
1795
1796 Expr::InList(InList {
1799 expr,
1800 list,
1801 negated: _,
1802 }) if is_null(expr.as_ref()) && !list.is_empty() => {
1803 Transformed::yes(lit_bool_null())
1804 }
1805
1806 Expr::InList(InList {
1808 expr,
1809 mut list,
1810 negated,
1811 }) if list.len() == 1
1812 && matches!(list.first(), Some(Expr::ScalarSubquery { .. })) =>
1813 {
1814 let Expr::ScalarSubquery(subquery) = list.remove(0) else {
1815 unreachable!()
1816 };
1817
1818 Transformed::yes(Expr::InSubquery(InSubquery::new(
1819 expr, subquery, negated,
1820 )))
1821 }
1822
1823 Expr::BinaryExpr(BinaryExpr {
1827 left,
1828 op: Or,
1829 right,
1830 }) if are_inlist_and_eq(left.as_ref(), right.as_ref()) => {
1831 let lhs = to_inlist(*left).unwrap();
1832 let rhs = to_inlist(*right).unwrap();
1833 let mut seen: HashSet<Expr> = HashSet::new();
1834 let list = lhs
1835 .list
1836 .into_iter()
1837 .chain(rhs.list)
1838 .filter(|e| seen.insert(e.to_owned()))
1839 .collect::<Vec<_>>();
1840
1841 let merged_inlist = InList {
1842 expr: lhs.expr,
1843 list,
1844 negated: false,
1845 };
1846
1847 Transformed::yes(Expr::InList(merged_inlist))
1848 }
1849
1850 Expr::BinaryExpr(BinaryExpr {
1867 left,
1868 op: And,
1869 right,
1870 }) if are_inlist_and_eq_and_match_neg(
1871 left.as_ref(),
1872 right.as_ref(),
1873 false,
1874 false,
1875 ) =>
1876 {
1877 match (*left, *right) {
1878 (Expr::InList(l1), Expr::InList(l2)) => {
1879 return inlist_intersection(l1, &l2, false).map(Transformed::yes);
1880 }
1881 _ => unreachable!(),
1883 }
1884 }
1885
1886 Expr::BinaryExpr(BinaryExpr {
1887 left,
1888 op: And,
1889 right,
1890 }) if are_inlist_and_eq_and_match_neg(
1891 left.as_ref(),
1892 right.as_ref(),
1893 true,
1894 true,
1895 ) =>
1896 {
1897 match (*left, *right) {
1898 (Expr::InList(l1), Expr::InList(l2)) => {
1899 return inlist_union(l1, l2, true).map(Transformed::yes);
1900 }
1901 _ => unreachable!(),
1903 }
1904 }
1905
1906 Expr::BinaryExpr(BinaryExpr {
1907 left,
1908 op: And,
1909 right,
1910 }) if are_inlist_and_eq_and_match_neg(
1911 left.as_ref(),
1912 right.as_ref(),
1913 false,
1914 true,
1915 ) =>
1916 {
1917 match (*left, *right) {
1918 (Expr::InList(l1), Expr::InList(l2)) => {
1919 return inlist_except(l1, &l2).map(Transformed::yes);
1920 }
1921 _ => unreachable!(),
1923 }
1924 }
1925
1926 Expr::BinaryExpr(BinaryExpr {
1927 left,
1928 op: And,
1929 right,
1930 }) if are_inlist_and_eq_and_match_neg(
1931 left.as_ref(),
1932 right.as_ref(),
1933 true,
1934 false,
1935 ) =>
1936 {
1937 match (*left, *right) {
1938 (Expr::InList(l1), Expr::InList(l2)) => {
1939 return inlist_except(l2, &l1).map(Transformed::yes);
1940 }
1941 _ => unreachable!(),
1943 }
1944 }
1945
1946 Expr::BinaryExpr(BinaryExpr {
1947 left,
1948 op: Or,
1949 right,
1950 }) if are_inlist_and_eq_and_match_neg(
1951 left.as_ref(),
1952 right.as_ref(),
1953 true,
1954 true,
1955 ) =>
1956 {
1957 match (*left, *right) {
1958 (Expr::InList(l1), Expr::InList(l2)) => {
1959 return inlist_intersection(l1, &l2, true).map(Transformed::yes);
1960 }
1961 _ => unreachable!(),
1963 }
1964 }
1965
1966 Expr::BinaryExpr(BinaryExpr { left, op, right })
1973 if is_cast_expr_and_support_unwrap_cast_in_comparison_for_binary(
1974 info, &left, op, &right,
1975 ) && op.supports_propagation() =>
1976 {
1977 unwrap_cast_in_comparison_for_binary(info, *left, *right, op)?
1978 }
1979 Expr::BinaryExpr(BinaryExpr { left, op, right })
1983 if is_cast_expr_and_support_unwrap_cast_in_comparison_for_binary(
1984 info, &right, op, &left,
1985 ) && op.supports_propagation()
1986 && op.swap().is_some() =>
1987 {
1988 unwrap_cast_in_comparison_for_binary(
1989 info,
1990 *right,
1991 *left,
1992 op.swap().unwrap(),
1993 )?
1994 }
1995 Expr::InList(InList {
1998 expr: mut left,
1999 list,
2000 negated,
2001 }) if is_cast_expr_and_support_unwrap_cast_in_comparison_for_inlist(
2002 info, &left, &list,
2003 ) =>
2004 {
2005 let (Expr::TryCast(TryCast {
2006 expr: left_expr, ..
2007 })
2008 | Expr::Cast(Cast {
2009 expr: left_expr, ..
2010 })) = left.as_mut()
2011 else {
2012 return internal_err!("Expect cast expr, but got {:?}", left)?;
2013 };
2014
2015 let expr_type = info.get_data_type(left_expr)?;
2016 let right_exprs = list
2017 .into_iter()
2018 .map(|right| {
2019 match right {
2020 Expr::Literal(right_lit_value, _) => {
2021 let Some(value) = try_cast_literal_to_type(&right_lit_value, &expr_type) else {
2024 internal_err!(
2025 "Can't cast the list expr {:?} to type {}",
2026 right_lit_value, &expr_type
2027 )?
2028 };
2029 Ok(lit(value))
2030 }
2031 other_expr => internal_err!(
2032 "Only support literal expr to optimize, but the expr is {:?}",
2033 &other_expr
2034 ),
2035 }
2036 })
2037 .collect::<Result<Vec<_>>>()?;
2038
2039 Transformed::yes(Expr::InList(InList {
2040 expr: std::mem::take(left_expr),
2041 list: right_exprs,
2042 negated,
2043 }))
2044 }
2045
2046 expr => Transformed::no(expr),
2048 })
2049 }
2050}
2051
2052fn as_string_scalar(expr: &Expr) -> Option<(DataType, &Option<String>)> {
2053 match expr {
2054 Expr::Literal(ScalarValue::Utf8(s), _) => Some((DataType::Utf8, s)),
2055 Expr::Literal(ScalarValue::LargeUtf8(s), _) => Some((DataType::LargeUtf8, s)),
2056 Expr::Literal(ScalarValue::Utf8View(s), _) => Some((DataType::Utf8View, s)),
2057 _ => None,
2058 }
2059}
2060
2061fn to_string_scalar(data_type: DataType, value: Option<String>) -> Expr {
2062 match data_type {
2063 DataType::Utf8 => Expr::Literal(ScalarValue::Utf8(value), None),
2064 DataType::LargeUtf8 => Expr::Literal(ScalarValue::LargeUtf8(value), None),
2065 DataType::Utf8View => Expr::Literal(ScalarValue::Utf8View(value), None),
2066 _ => unreachable!(),
2067 }
2068}
2069
2070fn has_common_conjunction(lhs: &Expr, rhs: &Expr) -> bool {
2071 let lhs_set: HashSet<&Expr> = iter_conjunction(lhs).collect();
2072 iter_conjunction(rhs).any(|e| lhs_set.contains(&e) && !e.is_volatile())
2073}
2074
2075fn are_inlist_and_eq_and_match_neg(
2077 left: &Expr,
2078 right: &Expr,
2079 is_left_neg: bool,
2080 is_right_neg: bool,
2081) -> bool {
2082 match (left, right) {
2083 (Expr::InList(l), Expr::InList(r)) => {
2084 l.expr == r.expr && l.negated == is_left_neg && r.negated == is_right_neg
2085 }
2086 _ => false,
2087 }
2088}
2089
2090fn are_inlist_and_eq(left: &Expr, right: &Expr) -> bool {
2092 let left = as_inlist(left);
2093 let right = as_inlist(right);
2094 if let (Some(lhs), Some(rhs)) = (left, right) {
2095 matches!(lhs.expr.as_ref(), Expr::Column(_))
2096 && matches!(rhs.expr.as_ref(), Expr::Column(_))
2097 && lhs.expr == rhs.expr
2098 && !lhs.negated
2099 && !rhs.negated
2100 } else {
2101 false
2102 }
2103}
2104
2105fn as_inlist(expr: &'_ Expr) -> Option<Cow<'_, InList>> {
2107 match expr {
2108 Expr::InList(inlist) => Some(Cow::Borrowed(inlist)),
2109 Expr::BinaryExpr(BinaryExpr { left, op, right }) if *op == Operator::Eq => {
2110 match (left.as_ref(), right.as_ref()) {
2111 (Expr::Column(_), Expr::Literal(_, _)) => Some(Cow::Owned(InList {
2112 expr: left.clone(),
2113 list: vec![*right.clone()],
2114 negated: false,
2115 })),
2116 (Expr::Literal(_, _), Expr::Column(_)) => Some(Cow::Owned(InList {
2117 expr: right.clone(),
2118 list: vec![*left.clone()],
2119 negated: false,
2120 })),
2121 _ => None,
2122 }
2123 }
2124 _ => None,
2125 }
2126}
2127
2128fn to_inlist(expr: Expr) -> Option<InList> {
2129 match expr {
2130 Expr::InList(inlist) => Some(inlist),
2131 Expr::BinaryExpr(BinaryExpr {
2132 left,
2133 op: Operator::Eq,
2134 right,
2135 }) => match (left.as_ref(), right.as_ref()) {
2136 (Expr::Column(_), Expr::Literal(_, _)) => Some(InList {
2137 expr: left,
2138 list: vec![*right],
2139 negated: false,
2140 }),
2141 (Expr::Literal(_, _), Expr::Column(_)) => Some(InList {
2142 expr: right,
2143 list: vec![*left],
2144 negated: false,
2145 }),
2146 _ => None,
2147 },
2148 _ => None,
2149 }
2150}
2151
2152fn inlist_union(mut l1: InList, l2: InList, negated: bool) -> Result<Expr> {
2155 let l1_items: HashSet<_> = l1.list.iter().collect();
2157
2158 let keep_l2: Vec<_> = l2
2160 .list
2161 .into_iter()
2162 .filter_map(|e| if l1_items.contains(&e) { None } else { Some(e) })
2163 .collect();
2164
2165 l1.list.extend(keep_l2);
2166 l1.negated = negated;
2167 Ok(Expr::InList(l1))
2168}
2169
2170fn inlist_intersection(mut l1: InList, l2: &InList, negated: bool) -> Result<Expr> {
2173 let l2_items = l2.list.iter().collect::<HashSet<_>>();
2174
2175 l1.list.retain(|e| l2_items.contains(e));
2177
2178 if l1.list.is_empty() {
2181 return Ok(lit(negated));
2182 }
2183 Ok(Expr::InList(l1))
2184}
2185
2186fn inlist_except(mut l1: InList, l2: &InList) -> Result<Expr> {
2189 let l2_items = l2.list.iter().collect::<HashSet<_>>();
2190
2191 l1.list.retain(|e| !l2_items.contains(e));
2193
2194 if l1.list.is_empty() {
2195 return Ok(lit(false));
2196 }
2197 Ok(Expr::InList(l1))
2198}
2199
2200fn is_exactly_true(expr: Expr, info: &impl SimplifyInfo) -> Result<Expr> {
2202 if !info.nullable(&expr)? {
2203 Ok(expr)
2204 } else {
2205 Ok(Expr::BinaryExpr(BinaryExpr {
2206 left: Box::new(expr),
2207 op: Operator::IsNotDistinctFrom,
2208 right: Box::new(lit(true)),
2209 }))
2210 }
2211}
2212
2213fn simplify_right_is_one_case<S: SimplifyInfo>(
2218 info: &S,
2219 left: Box<Expr>,
2220 op: &Operator,
2221 right: &Expr,
2222) -> Result<Transformed<Expr>> {
2223 let left_type = info.get_data_type(&left)?;
2225 let right_type = info.get_data_type(right)?;
2226 match BinaryTypeCoercer::new(&left_type, op, &right_type).get_result_type() {
2227 Ok(result_type) => {
2228 if left_type != result_type {
2230 Ok(Transformed::yes(Expr::Cast(Cast::new(left, result_type))))
2231 } else {
2232 Ok(Transformed::yes(*left))
2233 }
2234 }
2235 Err(_) => Ok(Transformed::yes(*left)),
2236 }
2237}
2238
2239#[cfg(test)]
2240mod tests {
2241 use super::*;
2242 use crate::simplify_expressions::SimplifyContext;
2243 use crate::test::test_table_scan_with_name;
2244 use arrow::datatypes::FieldRef;
2245 use datafusion_common::{assert_contains, DFSchemaRef, ToDFSchema};
2246 use datafusion_expr::{
2247 expr::WindowFunction,
2248 function::{
2249 AccumulatorArgs, AggregateFunctionSimplification,
2250 WindowFunctionSimplification,
2251 },
2252 interval_arithmetic::Interval,
2253 *,
2254 };
2255 use datafusion_functions_window_common::field::WindowUDFFieldArgs;
2256 use datafusion_functions_window_common::partition::PartitionEvaluatorArgs;
2257 use datafusion_physical_expr::PhysicalExpr;
2258 use std::hash::Hash;
2259 use std::sync::LazyLock;
2260 use std::{
2261 collections::HashMap,
2262 ops::{BitAnd, BitOr, BitXor},
2263 sync::Arc,
2264 };
2265
2266 #[test]
2270 fn api_basic() {
2271 let props = ExecutionProps::new();
2272 let simplifier =
2273 ExprSimplifier::new(SimplifyContext::new(&props).with_schema(test_schema()));
2274
2275 let expr = lit(1) + lit(2);
2276 let expected = lit(3);
2277 assert_eq!(expected, simplifier.simplify(expr).unwrap());
2278 }
2279
2280 #[test]
2281 fn basic_coercion() {
2282 let schema = test_schema();
2283 let props = ExecutionProps::new();
2284 let simplifier = ExprSimplifier::new(
2285 SimplifyContext::new(&props).with_schema(Arc::clone(&schema)),
2286 );
2287
2288 let expr = (lit(1i64) + lit(2i32)).lt(col("i"));
2291 let expected = lit(3i64).lt(col("i"));
2293
2294 let expr = simplifier.coerce(expr, &schema).unwrap();
2295
2296 assert_eq!(expected, simplifier.simplify(expr).unwrap());
2297 }
2298
2299 fn test_schema() -> DFSchemaRef {
2300 static TEST_SCHEMA: LazyLock<DFSchemaRef> = LazyLock::new(|| {
2301 Schema::new(vec![
2302 Field::new("i", DataType::Int64, false),
2303 Field::new("b", DataType::Boolean, true),
2304 ])
2305 .to_dfschema_ref()
2306 .unwrap()
2307 });
2308 Arc::clone(&TEST_SCHEMA)
2309 }
2310
2311 #[test]
2312 fn simplify_and_constant_prop() {
2313 let props = ExecutionProps::new();
2314 let simplifier =
2315 ExprSimplifier::new(SimplifyContext::new(&props).with_schema(test_schema()));
2316
2317 let expr = (col("i") * (lit(1) - lit(1))).gt(lit(0));
2320 let expected = lit(false);
2321 assert_eq!(expected, simplifier.simplify(expr).unwrap());
2322 }
2323
2324 #[test]
2325 fn simplify_and_constant_prop_with_case() {
2326 let props = ExecutionProps::new();
2327 let simplifier =
2328 ExprSimplifier::new(SimplifyContext::new(&props).with_schema(test_schema()));
2329
2330 let expr = when(col("i").gt(lit(5)).and(lit(false)), col("i").gt(lit(5)))
2338 .when(col("i").lt(lit(5)).and(lit(true)), col("i").lt(lit(5)))
2339 .otherwise(lit(false))
2340 .unwrap();
2341 let expected = col("i").lt(lit(5));
2342 assert_eq!(expected, simplifier.simplify(expr).unwrap());
2343 }
2344
2345 #[test]
2350 fn test_simplify_canonicalize() {
2351 {
2352 let expr = lit(1).lt(col("c2")).and(col("c2").gt(lit(1)));
2353 let expected = col("c2").gt(lit(1));
2354 assert_eq!(simplify(expr), expected);
2355 }
2356 {
2357 let expr = col("c1").lt(col("c2")).and(col("c2").gt(col("c1")));
2358 let expected = col("c2").gt(col("c1"));
2359 assert_eq!(simplify(expr), expected);
2360 }
2361 {
2362 let expr = col("c1")
2363 .eq(lit(1))
2364 .and(lit(1).eq(col("c1")))
2365 .and(col("c1").eq(lit(3)));
2366 let expected = col("c1").eq(lit(1)).and(col("c1").eq(lit(3)));
2367 assert_eq!(simplify(expr), expected);
2368 }
2369 {
2370 let expr = col("c1")
2371 .eq(col("c2"))
2372 .and(col("c1").gt(lit(5)))
2373 .and(col("c2").eq(col("c1")));
2374 let expected = col("c2").eq(col("c1")).and(col("c1").gt(lit(5)));
2375 assert_eq!(simplify(expr), expected);
2376 }
2377 {
2378 let expr = col("c1")
2379 .eq(lit(1))
2380 .and(col("c2").gt(lit(3)).or(lit(3).lt(col("c2"))));
2381 let expected = col("c1").eq(lit(1)).and(col("c2").gt(lit(3)));
2382 assert_eq!(simplify(expr), expected);
2383 }
2384 {
2385 let expr = col("c1").lt(lit(5)).and(col("c1").gt_eq(lit(5)));
2386 let expected = col("c1").lt(lit(5)).and(col("c1").gt_eq(lit(5)));
2387 assert_eq!(simplify(expr), expected);
2388 }
2389 {
2390 let expr = col("c1").lt(lit(5)).and(col("c1").gt_eq(lit(5)));
2391 let expected = col("c1").lt(lit(5)).and(col("c1").gt_eq(lit(5)));
2392 assert_eq!(simplify(expr), expected);
2393 }
2394 {
2395 let expr = col("c1").gt(col("c2")).and(col("c1").gt(col("c2")));
2396 let expected = col("c2").lt(col("c1"));
2397 assert_eq!(simplify(expr), expected);
2398 }
2399 }
2400
2401 #[test]
2402 fn test_simplify_eq_not_self() {
2403 let expr_a = col("c2").eq(col("c2"));
2406 let expected_a = col("c2").is_not_null().or(lit_bool_null());
2407
2408 let expr_b = col("c2_non_null").eq(col("c2_non_null"));
2410 let expected_b = lit(true);
2411
2412 assert_eq!(simplify(expr_a), expected_a);
2413 assert_eq!(simplify(expr_b), expected_b);
2414 }
2415
2416 #[test]
2417 fn test_simplify_or_true() {
2418 let expr_a = col("c2").or(lit(true));
2419 let expr_b = lit(true).or(col("c2"));
2420 let expected = lit(true);
2421
2422 assert_eq!(simplify(expr_a), expected);
2423 assert_eq!(simplify(expr_b), expected);
2424 }
2425
2426 #[test]
2427 fn test_simplify_or_false() {
2428 let expr_a = lit(false).or(col("c2"));
2429 let expr_b = col("c2").or(lit(false));
2430 let expected = col("c2");
2431
2432 assert_eq!(simplify(expr_a), expected);
2433 assert_eq!(simplify(expr_b), expected);
2434 }
2435
2436 #[test]
2437 fn test_simplify_or_same() {
2438 let expr = col("c2").or(col("c2"));
2439 let expected = col("c2");
2440
2441 assert_eq!(simplify(expr), expected);
2442 }
2443
2444 #[test]
2445 fn test_simplify_or_not_self() {
2446 let expr_a = col("c2_non_null").or(col("c2_non_null").not());
2449 let expr_b = col("c2_non_null").not().or(col("c2_non_null"));
2450 let expected = lit(true);
2451
2452 assert_eq!(simplify(expr_a), expected);
2453 assert_eq!(simplify(expr_b), expected);
2454 }
2455
2456 #[test]
2457 fn test_simplify_and_false() {
2458 let expr_a = lit(false).and(col("c2"));
2459 let expr_b = col("c2").and(lit(false));
2460 let expected = lit(false);
2461
2462 assert_eq!(simplify(expr_a), expected);
2463 assert_eq!(simplify(expr_b), expected);
2464 }
2465
2466 #[test]
2467 fn test_simplify_and_same() {
2468 let expr = col("c2").and(col("c2"));
2469 let expected = col("c2");
2470
2471 assert_eq!(simplify(expr), expected);
2472 }
2473
2474 #[test]
2475 fn test_simplify_and_true() {
2476 let expr_a = lit(true).and(col("c2"));
2477 let expr_b = col("c2").and(lit(true));
2478 let expected = col("c2");
2479
2480 assert_eq!(simplify(expr_a), expected);
2481 assert_eq!(simplify(expr_b), expected);
2482 }
2483
2484 #[test]
2485 fn test_simplify_and_not_self() {
2486 let expr_a = col("c2_non_null").and(col("c2_non_null").not());
2489 let expr_b = col("c2_non_null").not().and(col("c2_non_null"));
2490 let expected = lit(false);
2491
2492 assert_eq!(simplify(expr_a), expected);
2493 assert_eq!(simplify(expr_b), expected);
2494 }
2495
2496 #[test]
2497 fn test_simplify_multiply_by_one() {
2498 let expr_a = col("c2") * lit(1);
2499 let expr_b = lit(1) * col("c2");
2500 let expected = col("c2");
2501
2502 assert_eq!(simplify(expr_a), expected);
2503 assert_eq!(simplify(expr_b), expected);
2504
2505 let expr = col("c2") * lit(ScalarValue::Decimal128(Some(10000000000), 38, 10));
2506 assert_eq!(simplify(expr), expected);
2507
2508 let expr = lit(ScalarValue::Decimal128(Some(10000000000), 31, 10)) * col("c2");
2509 assert_eq!(simplify(expr), expected);
2510 }
2511
2512 #[test]
2513 fn test_simplify_multiply_by_null() {
2514 let null = lit(ScalarValue::Int64(None));
2515 {
2517 let expr = col("c3") * null.clone();
2518 assert_eq!(simplify(expr), null);
2519 }
2520 {
2522 let expr = null.clone() * col("c3");
2523 assert_eq!(simplify(expr), null);
2524 }
2525 }
2526
2527 #[test]
2528 fn test_simplify_multiply_by_zero() {
2529 {
2531 let expr_a = col("c2") * lit(0);
2532 let expr_b = lit(0) * col("c2");
2533
2534 assert_eq!(simplify(expr_a.clone()), expr_a);
2535 assert_eq!(simplify(expr_b.clone()), expr_b);
2536 }
2537 {
2539 let expr = lit(0) * col("c2_non_null");
2540 assert_eq!(simplify(expr), lit(0));
2541 }
2542 {
2544 let expr = col("c2_non_null") * lit(0);
2545 assert_eq!(simplify(expr), lit(0));
2546 }
2547 {
2549 let expr = col("c2_non_null") * lit(ScalarValue::Decimal128(Some(0), 31, 10));
2550 assert_eq!(
2551 simplify(expr),
2552 lit(ScalarValue::Decimal128(Some(0), 31, 10))
2553 );
2554 let expr = binary_expr(
2555 lit(ScalarValue::Decimal128(Some(0), 31, 10)),
2556 Operator::Multiply,
2557 col("c2_non_null"),
2558 );
2559 assert_eq!(
2560 simplify(expr),
2561 lit(ScalarValue::Decimal128(Some(0), 31, 10))
2562 );
2563 }
2564 }
2565
2566 #[test]
2567 fn test_simplify_divide_by_one() {
2568 let expr = binary_expr(col("c2"), Operator::Divide, lit(1));
2569 let expected = col("c2");
2570 assert_eq!(simplify(expr), expected);
2571 let expr = col("c2") / lit(ScalarValue::Decimal128(Some(10000000000), 31, 10));
2572 assert_eq!(simplify(expr), expected);
2573 }
2574
2575 #[test]
2576 fn test_simplify_divide_null() {
2577 let null = lit(ScalarValue::Int64(None));
2579 {
2580 let expr = col("c3") / null.clone();
2581 assert_eq!(simplify(expr), null);
2582 }
2583 {
2585 let expr = null.clone() / col("c3");
2586 assert_eq!(simplify(expr), null);
2587 }
2588 }
2589
2590 #[test]
2591 fn test_simplify_divide_by_same() {
2592 let expr = col("c2") / col("c2");
2593 let expected = expr.clone();
2595
2596 assert_eq!(simplify(expr), expected);
2597 }
2598
2599 #[test]
2600 fn test_simplify_modulo_by_null() {
2601 let null = lit(ScalarValue::Int64(None));
2602 {
2604 let expr = col("c3") % null.clone();
2605 assert_eq!(simplify(expr), null);
2606 }
2607 {
2609 let expr = null.clone() % col("c3");
2610 assert_eq!(simplify(expr), null);
2611 }
2612 }
2613
2614 #[test]
2615 fn test_simplify_modulo_by_one() {
2616 let expr = col("c2") % lit(1);
2617 let expected = expr.clone();
2619
2620 assert_eq!(simplify(expr), expected);
2621 }
2622
2623 #[test]
2624 fn test_simplify_divide_zero_by_zero() {
2625 let expr = lit(0) / lit(0);
2628 let expected = expr.clone();
2629
2630 assert_eq!(simplify(expr), expected);
2631 }
2632
2633 #[test]
2634 fn test_simplify_divide_by_zero() {
2635 let expr = col("c2_non_null") / lit(0);
2638 let expected = expr.clone();
2639
2640 assert_eq!(simplify(expr), expected);
2641 }
2642
2643 #[test]
2644 fn test_simplify_modulo_by_one_non_null() {
2645 let expr = col("c3_non_null") % lit(1);
2646 let expected = lit(0_i64);
2647 assert_eq!(simplify(expr), expected);
2648 let expr =
2649 col("c3_non_null") % lit(ScalarValue::Decimal128(Some(10000000000), 31, 10));
2650 assert_eq!(simplify(expr), expected);
2651 }
2652
2653 #[test]
2654 fn test_simplify_bitwise_xor_by_null() {
2655 let null = lit(ScalarValue::Int64(None));
2656 {
2658 let expr = col("c3") ^ null.clone();
2659 assert_eq!(simplify(expr), null);
2660 }
2661 {
2663 let expr = null.clone() ^ col("c3");
2664 assert_eq!(simplify(expr), null);
2665 }
2666 }
2667
2668 #[test]
2669 fn test_simplify_bitwise_shift_right_by_null() {
2670 let null = lit(ScalarValue::Int64(None));
2671 {
2673 let expr = col("c3") >> null.clone();
2674 assert_eq!(simplify(expr), null);
2675 }
2676 {
2678 let expr = null.clone() >> col("c3");
2679 assert_eq!(simplify(expr), null);
2680 }
2681 }
2682
2683 #[test]
2684 fn test_simplify_bitwise_shift_left_by_null() {
2685 let null = lit(ScalarValue::Int64(None));
2686 {
2688 let expr = col("c3") << null.clone();
2689 assert_eq!(simplify(expr), null);
2690 }
2691 {
2693 let expr = null.clone() << col("c3");
2694 assert_eq!(simplify(expr), null);
2695 }
2696 }
2697
2698 #[test]
2699 fn test_simplify_bitwise_and_by_zero() {
2700 {
2702 let expr = col("c2_non_null") & lit(0);
2703 assert_eq!(simplify(expr), lit(0));
2704 }
2705 {
2707 let expr = lit(0) & col("c2_non_null");
2708 assert_eq!(simplify(expr), lit(0));
2709 }
2710 }
2711
2712 #[test]
2713 fn test_simplify_bitwise_or_by_zero() {
2714 {
2716 let expr = col("c2_non_null") | lit(0);
2717 assert_eq!(simplify(expr), col("c2_non_null"));
2718 }
2719 {
2721 let expr = lit(0) | col("c2_non_null");
2722 assert_eq!(simplify(expr), col("c2_non_null"));
2723 }
2724 }
2725
2726 #[test]
2727 fn test_simplify_bitwise_xor_by_zero() {
2728 {
2730 let expr = col("c2_non_null") ^ lit(0);
2731 assert_eq!(simplify(expr), col("c2_non_null"));
2732 }
2733 {
2735 let expr = lit(0) ^ col("c2_non_null");
2736 assert_eq!(simplify(expr), col("c2_non_null"));
2737 }
2738 }
2739
2740 #[test]
2741 fn test_simplify_bitwise_bitwise_shift_right_by_zero() {
2742 {
2744 let expr = col("c2_non_null") >> lit(0);
2745 assert_eq!(simplify(expr), col("c2_non_null"));
2746 }
2747 }
2748
2749 #[test]
2750 fn test_simplify_bitwise_bitwise_shift_left_by_zero() {
2751 {
2753 let expr = col("c2_non_null") << lit(0);
2754 assert_eq!(simplify(expr), col("c2_non_null"));
2755 }
2756 }
2757
2758 #[test]
2759 fn test_simplify_bitwise_and_by_null() {
2760 let null = Expr::Literal(ScalarValue::Int64(None), None);
2761 {
2763 let expr = col("c3") & null.clone();
2764 assert_eq!(simplify(expr), null);
2765 }
2766 {
2768 let expr = null.clone() & col("c3");
2769 assert_eq!(simplify(expr), null);
2770 }
2771 }
2772
2773 #[test]
2774 fn test_simplify_composed_bitwise_and() {
2775 let expr = bitwise_and(
2778 bitwise_and(col("c2").gt(lit(5)), col("c1").lt(lit(6))),
2779 col("c2").gt(lit(5)),
2780 );
2781 let expected = bitwise_and(col("c2").gt(lit(5)), col("c1").lt(lit(6)));
2782
2783 assert_eq!(simplify(expr), expected);
2784
2785 let expr = bitwise_and(
2788 col("c2").gt(lit(5)),
2789 bitwise_and(col("c2").gt(lit(5)), col("c1").lt(lit(6))),
2790 );
2791 let expected = bitwise_and(col("c2").gt(lit(5)), col("c1").lt(lit(6)));
2792 assert_eq!(simplify(expr), expected);
2793 }
2794
2795 #[test]
2796 fn test_simplify_composed_bitwise_or() {
2797 let expr = bitwise_or(
2800 bitwise_or(col("c2").gt(lit(5)), col("c1").lt(lit(6))),
2801 col("c2").gt(lit(5)),
2802 );
2803 let expected = bitwise_or(col("c2").gt(lit(5)), col("c1").lt(lit(6)));
2804
2805 assert_eq!(simplify(expr), expected);
2806
2807 let expr = bitwise_or(
2810 col("c2").gt(lit(5)),
2811 bitwise_or(col("c2").gt(lit(5)), col("c1").lt(lit(6))),
2812 );
2813 let expected = bitwise_or(col("c2").gt(lit(5)), col("c1").lt(lit(6)));
2814
2815 assert_eq!(simplify(expr), expected);
2816 }
2817
2818 #[test]
2819 fn test_simplify_composed_bitwise_xor() {
2820 let expr = bitwise_xor(
2824 col("c2"),
2825 bitwise_xor(
2826 bitwise_xor(col("c2"), bitwise_or(col("c2"), col("c1"))),
2827 bitwise_and(col("c1"), col("c2")),
2828 ),
2829 );
2830
2831 let expected = bitwise_xor(
2832 bitwise_or(col("c2"), col("c1")),
2833 bitwise_and(col("c1"), col("c2")),
2834 );
2835
2836 assert_eq!(simplify(expr), expected);
2837
2838 let expr = bitwise_xor(
2842 col("c2"),
2843 bitwise_xor(
2844 bitwise_xor(col("c2"), bitwise_or(col("c2"), col("c1"))),
2845 bitwise_xor(bitwise_and(col("c1"), col("c2")), col("c2")),
2846 ),
2847 );
2848
2849 let expected = bitwise_xor(
2850 col("c2"),
2851 bitwise_xor(
2852 bitwise_or(col("c2"), col("c1")),
2853 bitwise_and(col("c1"), col("c2")),
2854 ),
2855 );
2856
2857 assert_eq!(simplify(expr), expected);
2858
2859 let expr = bitwise_xor(
2863 bitwise_xor(
2864 bitwise_xor(col("c2"), bitwise_or(col("c2"), col("c1"))),
2865 bitwise_and(col("c1"), col("c2")),
2866 ),
2867 col("c2"),
2868 );
2869
2870 let expected = bitwise_xor(
2871 bitwise_or(col("c2"), col("c1")),
2872 bitwise_and(col("c1"), col("c2")),
2873 );
2874
2875 assert_eq!(simplify(expr), expected);
2876
2877 let expr = bitwise_xor(
2881 bitwise_xor(
2882 bitwise_xor(col("c2"), bitwise_or(col("c2"), col("c1"))),
2883 bitwise_xor(bitwise_and(col("c1"), col("c2")), col("c2")),
2884 ),
2885 col("c2"),
2886 );
2887
2888 let expected = bitwise_xor(
2889 bitwise_xor(
2890 bitwise_or(col("c2"), col("c1")),
2891 bitwise_and(col("c1"), col("c2")),
2892 ),
2893 col("c2"),
2894 );
2895
2896 assert_eq!(simplify(expr), expected);
2897 }
2898
2899 #[test]
2900 fn test_simplify_negated_bitwise_and() {
2901 let expr = (-col("c4_non_null")) & col("c4_non_null");
2903 let expected = lit(0u32);
2904
2905 assert_eq!(simplify(expr), expected);
2906 let expr = col("c4_non_null") & (-col("c4_non_null"));
2908 let expected = lit(0u32);
2909
2910 assert_eq!(simplify(expr), expected);
2911
2912 let expr = (-col("c3_non_null")) & col("c3_non_null");
2914 let expected = lit(0i64);
2915
2916 assert_eq!(simplify(expr), expected);
2917 let expr = col("c3_non_null") & (-col("c3_non_null"));
2919 let expected = lit(0i64);
2920
2921 assert_eq!(simplify(expr), expected);
2922 }
2923
2924 #[test]
2925 fn test_simplify_negated_bitwise_or() {
2926 let expr = (-col("c4_non_null")) | col("c4_non_null");
2928 let expected = lit(-1i32);
2929
2930 assert_eq!(simplify(expr), expected);
2931
2932 let expr = col("c4_non_null") | (-col("c4_non_null"));
2934 let expected = lit(-1i32);
2935
2936 assert_eq!(simplify(expr), expected);
2937
2938 let expr = (-col("c3_non_null")) | col("c3_non_null");
2940 let expected = lit(-1i64);
2941
2942 assert_eq!(simplify(expr), expected);
2943
2944 let expr = col("c3_non_null") | (-col("c3_non_null"));
2946 let expected = lit(-1i64);
2947
2948 assert_eq!(simplify(expr), expected);
2949 }
2950
2951 #[test]
2952 fn test_simplify_negated_bitwise_xor() {
2953 let expr = (-col("c4_non_null")) ^ col("c4_non_null");
2955 let expected = lit(-1i32);
2956
2957 assert_eq!(simplify(expr), expected);
2958
2959 let expr = col("c4_non_null") ^ (-col("c4_non_null"));
2961 let expected = lit(-1i32);
2962
2963 assert_eq!(simplify(expr), expected);
2964
2965 let expr = (-col("c3_non_null")) ^ col("c3_non_null");
2967 let expected = lit(-1i64);
2968
2969 assert_eq!(simplify(expr), expected);
2970
2971 let expr = col("c3_non_null") ^ (-col("c3_non_null"));
2973 let expected = lit(-1i64);
2974
2975 assert_eq!(simplify(expr), expected);
2976 }
2977
2978 #[test]
2979 fn test_simplify_bitwise_and_or() {
2980 let expr = bitwise_and(
2982 col("c2_non_null").lt(lit(3)),
2983 bitwise_or(col("c2_non_null").lt(lit(3)), col("c1_non_null")),
2984 );
2985 let expected = col("c2_non_null").lt(lit(3));
2986
2987 assert_eq!(simplify(expr), expected);
2988 }
2989
2990 #[test]
2991 fn test_simplify_bitwise_or_and() {
2992 let expr = bitwise_or(
2994 col("c2_non_null").lt(lit(3)),
2995 bitwise_and(col("c2_non_null").lt(lit(3)), col("c1_non_null")),
2996 );
2997 let expected = col("c2_non_null").lt(lit(3));
2998
2999 assert_eq!(simplify(expr), expected);
3000 }
3001
3002 #[test]
3003 fn test_simplify_simple_bitwise_and() {
3004 let expr = (col("c2").gt(lit(5))).bitand(col("c2").gt(lit(5)));
3006 let expected = col("c2").gt(lit(5));
3007
3008 assert_eq!(simplify(expr), expected);
3009 }
3010
3011 #[test]
3012 fn test_simplify_simple_bitwise_or() {
3013 let expr = (col("c2").gt(lit(5))).bitor(col("c2").gt(lit(5)));
3015 let expected = col("c2").gt(lit(5));
3016
3017 assert_eq!(simplify(expr), expected);
3018 }
3019
3020 #[test]
3021 fn test_simplify_simple_bitwise_xor() {
3022 let expr = (col("c4")).bitxor(col("c4"));
3024 let expected = lit(0u32);
3025
3026 assert_eq!(simplify(expr), expected);
3027
3028 let expr = col("c3").bitxor(col("c3"));
3030 let expected = lit(0i64);
3031
3032 assert_eq!(simplify(expr), expected);
3033 }
3034
3035 #[test]
3036 fn test_simplify_modulo_by_zero_non_null() {
3037 let expr = col("c2_non_null") % lit(0);
3040 let expected = expr.clone();
3041
3042 assert_eq!(simplify(expr), expected);
3043 }
3044
3045 #[test]
3046 fn test_simplify_simple_and() {
3047 let expr = (col("c2").gt(lit(5))).and(col("c2").gt(lit(5)));
3049 let expected = col("c2").gt(lit(5));
3050
3051 assert_eq!(simplify(expr), expected);
3052 }
3053
3054 #[test]
3055 fn test_simplify_composed_and() {
3056 let expr = and(
3058 and(col("c2").gt(lit(5)), col("c1").lt(lit(6))),
3059 col("c2").gt(lit(5)),
3060 );
3061 let expected = and(col("c2").gt(lit(5)), col("c1").lt(lit(6)));
3062
3063 assert_eq!(simplify(expr), expected);
3064 }
3065
3066 #[test]
3067 fn test_simplify_negated_and() {
3068 let expr = and(col("c2").gt(lit(5)), Expr::not(col("c2").gt(lit(5))));
3070 let expected = col("c2").gt(lit(5)).and(col("c2").lt_eq(lit(5)));
3071
3072 assert_eq!(simplify(expr), expected);
3073 }
3074
3075 #[test]
3076 fn test_simplify_or_and() {
3077 let l = col("c2").gt(lit(5));
3078 let r = and(col("c1").lt(lit(6)), col("c2").gt(lit(5)));
3079
3080 let expr = or(l.clone(), r.clone());
3082
3083 let expected = l.clone();
3084 assert_eq!(simplify(expr), expected);
3085
3086 let expr = or(r, l);
3088 assert_eq!(simplify(expr), expected);
3089 }
3090
3091 #[test]
3092 fn test_simplify_or_and_non_null() {
3093 let l = col("c2_non_null").gt(lit(5));
3094 let r = and(col("c1_non_null").lt(lit(6)), col("c2_non_null").gt(lit(5)));
3095
3096 let expr = or(l.clone(), r.clone());
3098
3099 let expected = col("c2_non_null").gt(lit(5));
3101
3102 assert_eq!(simplify(expr), expected);
3103
3104 let expr = or(l, r);
3106
3107 assert_eq!(simplify(expr), expected);
3108 }
3109
3110 #[test]
3111 fn test_simplify_and_or() {
3112 let l = col("c2").gt(lit(5));
3113 let r = or(col("c1").lt(lit(6)), col("c2").gt(lit(5)));
3114
3115 let expr = and(l.clone(), r.clone());
3117
3118 let expected = l.clone();
3119 assert_eq!(simplify(expr), expected);
3120
3121 let expr = and(r, l);
3123 assert_eq!(simplify(expr), expected);
3124 }
3125
3126 #[test]
3127 fn test_simplify_and_or_non_null() {
3128 let l = col("c2_non_null").gt(lit(5));
3129 let r = or(col("c1_non_null").lt(lit(6)), col("c2_non_null").gt(lit(5)));
3130
3131 let expr = and(l.clone(), r.clone());
3133
3134 let expected = col("c2_non_null").gt(lit(5));
3136
3137 assert_eq!(simplify(expr), expected);
3138
3139 let expr = and(l, r);
3141
3142 assert_eq!(simplify(expr), expected);
3143 }
3144
3145 #[test]
3146 fn test_simplify_by_de_morgan_laws() {
3147 let expr = and(col("c3"), col("c4")).not();
3150 let expected = or(col("c3").not(), col("c4").not());
3151 assert_eq!(simplify(expr), expected);
3152 let expr = or(col("c3"), col("c4")).not();
3154 let expected = and(col("c3").not(), col("c4").not());
3155 assert_eq!(simplify(expr), expected);
3156 let expr = col("c3").not().not();
3158 let expected = col("c3");
3159 assert_eq!(simplify(expr), expected);
3160
3161 let expr = -bitwise_and(col("c3"), col("c4"));
3164 let expected = bitwise_or(-col("c3"), -col("c4"));
3165 assert_eq!(simplify(expr), expected);
3166 let expr = -bitwise_or(col("c3"), col("c4"));
3168 let expected = bitwise_and(-col("c3"), -col("c4"));
3169 assert_eq!(simplify(expr), expected);
3170 let expr = -(-col("c3"));
3172 let expected = col("c3");
3173 assert_eq!(simplify(expr), expected);
3174 }
3175
3176 #[test]
3177 fn test_simplify_null_and_false() {
3178 let expr = and(lit_bool_null(), lit(false));
3179 let expr_eq = lit(false);
3180
3181 assert_eq!(simplify(expr), expr_eq);
3182 }
3183
3184 #[test]
3185 fn test_simplify_divide_null_by_null() {
3186 let null = lit(ScalarValue::Int32(None));
3187 let expr_plus = null.clone() / null.clone();
3188 let expr_eq = null;
3189
3190 assert_eq!(simplify(expr_plus), expr_eq);
3191 }
3192
3193 #[test]
3194 fn test_simplify_simplify_arithmetic_expr() {
3195 let expr_plus = lit(1) + lit(1);
3196
3197 assert_eq!(simplify(expr_plus), lit(2));
3198 }
3199
3200 #[test]
3201 fn test_simplify_simplify_eq_expr() {
3202 let expr_eq = binary_expr(lit(1), Operator::Eq, lit(1));
3203
3204 assert_eq!(simplify(expr_eq), lit(true));
3205 }
3206
3207 #[test]
3208 fn test_simplify_regex() {
3209 assert_contains!(
3211 try_simplify(regex_match(col("c1"), lit("foo{")))
3212 .unwrap_err()
3213 .to_string(),
3214 "regex parse error"
3215 );
3216
3217 assert_no_change(regex_match(col("c1"), lit("foo.*")));
3219 assert_no_change(regex_match(col("c1"), lit("(foo)")));
3220 assert_no_change(regex_match(col("c1"), lit("%")));
3221 assert_no_change(regex_match(col("c1"), lit("_")));
3222 assert_no_change(regex_match(col("c1"), lit("f%o")));
3223 assert_no_change(regex_match(col("c1"), lit("^f%o")));
3224 assert_no_change(regex_match(col("c1"), lit("f_o")));
3225
3226 assert_change(
3228 regex_match(col("c1"), lit("")),
3229 if_not_null(col("c1"), true),
3230 );
3231 assert_change(
3232 regex_not_match(col("c1"), lit("")),
3233 if_not_null(col("c1"), false),
3234 );
3235 assert_change(
3236 regex_imatch(col("c1"), lit("")),
3237 if_not_null(col("c1"), true),
3238 );
3239 assert_change(
3240 regex_not_imatch(col("c1"), lit("")),
3241 if_not_null(col("c1"), false),
3242 );
3243
3244 assert_change(regex_match(col("c1"), lit("x")), col("c1").like(lit("%x%")));
3246
3247 assert_change(
3249 regex_match(col("c1"), lit("foo")),
3250 col("c1").like(lit("%foo%")),
3251 );
3252
3253 assert_change(regex_match(col("c1"), lit("^$")), col("c1").eq(lit("")));
3255 assert_change(
3256 regex_not_match(col("c1"), lit("^$")),
3257 col("c1").not_eq(lit("")),
3258 );
3259 assert_change(
3260 regex_match(col("c1"), lit("^foo$")),
3261 col("c1").eq(lit("foo")),
3262 );
3263 assert_change(
3264 regex_not_match(col("c1"), lit("^foo$")),
3265 col("c1").not_eq(lit("foo")),
3266 );
3267
3268 assert_change(
3270 regex_match(col("c1"), lit("^(foo|bar)$")),
3271 col("c1").eq(lit("foo")).or(col("c1").eq(lit("bar"))),
3272 );
3273 assert_change(
3274 regex_not_match(col("c1"), lit("^(foo|bar)$")),
3275 col("c1")
3276 .not_eq(lit("foo"))
3277 .and(col("c1").not_eq(lit("bar"))),
3278 );
3279 assert_change(
3280 regex_match(col("c1"), lit("^(foo)$")),
3281 col("c1").eq(lit("foo")),
3282 );
3283 assert_change(
3284 regex_match(col("c1"), lit("^(foo|bar|baz)$")),
3285 ((col("c1").eq(lit("foo"))).or(col("c1").eq(lit("bar"))))
3286 .or(col("c1").eq(lit("baz"))),
3287 );
3288 assert_change(
3289 regex_match(col("c1"), lit("^(foo|bar|baz|qux)$")),
3290 col("c1")
3291 .in_list(vec![lit("foo"), lit("bar"), lit("baz"), lit("qux")], false),
3292 );
3293 assert_change(
3294 regex_match(col("c1"), lit("^(fo_o)$")),
3295 col("c1").eq(lit("fo_o")),
3296 );
3297 assert_change(
3298 regex_match(col("c1"), lit("^(fo_o)$")),
3299 col("c1").eq(lit("fo_o")),
3300 );
3301 assert_change(
3302 regex_match(col("c1"), lit("^(fo_o|ba_r)$")),
3303 col("c1").eq(lit("fo_o")).or(col("c1").eq(lit("ba_r"))),
3304 );
3305 assert_change(
3306 regex_not_match(col("c1"), lit("^(fo_o|ba_r)$")),
3307 col("c1")
3308 .not_eq(lit("fo_o"))
3309 .and(col("c1").not_eq(lit("ba_r"))),
3310 );
3311 assert_change(
3312 regex_match(col("c1"), lit("^(fo_o|ba_r|ba_z)$")),
3313 ((col("c1").eq(lit("fo_o"))).or(col("c1").eq(lit("ba_r"))))
3314 .or(col("c1").eq(lit("ba_z"))),
3315 );
3316 assert_change(
3317 regex_match(col("c1"), lit("^(fo_o|ba_r|baz|qu_x)$")),
3318 col("c1").in_list(
3319 vec![lit("fo_o"), lit("ba_r"), lit("baz"), lit("qu_x")],
3320 false,
3321 ),
3322 );
3323
3324 assert_no_change(regex_match(col("c1"), lit("(foo|bar)")));
3326 assert_no_change(regex_match(col("c1"), lit("(foo|bar)*")));
3327 assert_no_change(regex_match(col("c1"), lit("(fo_o|b_ar)")));
3328 assert_no_change(regex_match(col("c1"), lit("(foo|ba_r)*")));
3329 assert_no_change(regex_match(col("c1"), lit("(fo_o|ba_r)*")));
3330 assert_no_change(regex_match(col("c1"), lit("^(foo|bar)*")));
3331 assert_no_change(regex_match(col("c1"), lit("^(foo)(bar)$")));
3332 assert_no_change(regex_match(col("c1"), lit("^")));
3333 assert_no_change(regex_match(col("c1"), lit("$")));
3334 assert_no_change(regex_match(col("c1"), lit("$^")));
3335 assert_no_change(regex_match(col("c1"), lit("$foo^")));
3336
3337 assert_change(
3339 regex_match(col("c1"), lit("^foo")),
3340 col("c1").like(lit("foo%")),
3341 );
3342 assert_change(
3343 regex_match(col("c1"), lit("foo$")),
3344 col("c1").like(lit("%foo")),
3345 );
3346 assert_change(
3347 regex_match(col("c1"), lit("^foo|bar$")),
3348 col("c1").like(lit("foo%")).or(col("c1").like(lit("%bar"))),
3349 );
3350
3351 assert_change(
3353 regex_match(col("c1"), lit("foo|bar|baz")),
3354 col("c1")
3355 .like(lit("%foo%"))
3356 .or(col("c1").like(lit("%bar%")))
3357 .or(col("c1").like(lit("%baz%"))),
3358 );
3359 assert_change(
3360 regex_match(col("c1"), lit("foo|x|baz")),
3361 col("c1")
3362 .like(lit("%foo%"))
3363 .or(col("c1").like(lit("%x%")))
3364 .or(col("c1").like(lit("%baz%"))),
3365 );
3366 assert_change(
3367 regex_not_match(col("c1"), lit("foo|bar|baz")),
3368 col("c1")
3369 .not_like(lit("%foo%"))
3370 .and(col("c1").not_like(lit("%bar%")))
3371 .and(col("c1").not_like(lit("%baz%"))),
3372 );
3373 assert_change(
3375 regex_match(col("c1"), lit("foo|^x$|baz")),
3376 col("c1")
3377 .like(lit("%foo%"))
3378 .or(col("c1").eq(lit("x")))
3379 .or(col("c1").like(lit("%baz%"))),
3380 );
3381 assert_change(
3382 regex_not_match(col("c1"), lit("foo|^bar$|baz")),
3383 col("c1")
3384 .not_like(lit("%foo%"))
3385 .and(col("c1").not_eq(lit("bar")))
3386 .and(col("c1").not_like(lit("%baz%"))),
3387 );
3388 assert_no_change(regex_match(col("c1"), lit("foo|bar|baz|blarg|bozo|etc")));
3390 }
3391
3392 #[track_caller]
3393 fn assert_no_change(expr: Expr) {
3394 let optimized = simplify(expr.clone());
3395 assert_eq!(expr, optimized);
3396 }
3397
3398 #[track_caller]
3399 fn assert_change(expr: Expr, expected: Expr) {
3400 let optimized = simplify(expr);
3401 assert_eq!(optimized, expected);
3402 }
3403
3404 fn regex_match(left: Expr, right: Expr) -> Expr {
3405 Expr::BinaryExpr(BinaryExpr {
3406 left: Box::new(left),
3407 op: Operator::RegexMatch,
3408 right: Box::new(right),
3409 })
3410 }
3411
3412 fn regex_not_match(left: Expr, right: Expr) -> Expr {
3413 Expr::BinaryExpr(BinaryExpr {
3414 left: Box::new(left),
3415 op: Operator::RegexNotMatch,
3416 right: Box::new(right),
3417 })
3418 }
3419
3420 fn regex_imatch(left: Expr, right: Expr) -> Expr {
3421 Expr::BinaryExpr(BinaryExpr {
3422 left: Box::new(left),
3423 op: Operator::RegexIMatch,
3424 right: Box::new(right),
3425 })
3426 }
3427
3428 fn regex_not_imatch(left: Expr, right: Expr) -> Expr {
3429 Expr::BinaryExpr(BinaryExpr {
3430 left: Box::new(left),
3431 op: Operator::RegexNotIMatch,
3432 right: Box::new(right),
3433 })
3434 }
3435
3436 fn try_simplify(expr: Expr) -> Result<Expr> {
3441 let schema = expr_test_schema();
3442 let execution_props = ExecutionProps::new();
3443 let simplifier = ExprSimplifier::new(
3444 SimplifyContext::new(&execution_props).with_schema(schema),
3445 );
3446 simplifier.simplify(expr)
3447 }
3448
3449 fn coerce(expr: Expr) -> Expr {
3450 let schema = expr_test_schema();
3451 let execution_props = ExecutionProps::new();
3452 let simplifier = ExprSimplifier::new(
3453 SimplifyContext::new(&execution_props).with_schema(Arc::clone(&schema)),
3454 );
3455 simplifier.coerce(expr, schema.as_ref()).unwrap()
3456 }
3457
3458 fn simplify(expr: Expr) -> Expr {
3459 try_simplify(expr).unwrap()
3460 }
3461
3462 fn try_simplify_with_cycle_count(expr: Expr) -> Result<(Expr, u32)> {
3463 let schema = expr_test_schema();
3464 let execution_props = ExecutionProps::new();
3465 let simplifier = ExprSimplifier::new(
3466 SimplifyContext::new(&execution_props).with_schema(schema),
3467 );
3468 let (expr, count) = simplifier.simplify_with_cycle_count_transformed(expr)?;
3469 Ok((expr.data, count))
3470 }
3471
3472 fn simplify_with_cycle_count(expr: Expr) -> (Expr, u32) {
3473 try_simplify_with_cycle_count(expr).unwrap()
3474 }
3475
3476 fn simplify_with_guarantee(
3477 expr: Expr,
3478 guarantees: Vec<(Expr, NullableInterval)>,
3479 ) -> Expr {
3480 let schema = expr_test_schema();
3481 let execution_props = ExecutionProps::new();
3482 let simplifier = ExprSimplifier::new(
3483 SimplifyContext::new(&execution_props).with_schema(schema),
3484 )
3485 .with_guarantees(guarantees);
3486 simplifier.simplify(expr).unwrap()
3487 }
3488
3489 fn expr_test_schema() -> DFSchemaRef {
3490 static EXPR_TEST_SCHEMA: LazyLock<DFSchemaRef> = LazyLock::new(|| {
3491 Arc::new(
3492 DFSchema::from_unqualified_fields(
3493 vec![
3494 Field::new("c1", DataType::Utf8, true),
3495 Field::new("c2", DataType::Boolean, true),
3496 Field::new("c3", DataType::Int64, true),
3497 Field::new("c4", DataType::UInt32, true),
3498 Field::new("c1_non_null", DataType::Utf8, false),
3499 Field::new("c2_non_null", DataType::Boolean, false),
3500 Field::new("c3_non_null", DataType::Int64, false),
3501 Field::new("c4_non_null", DataType::UInt32, false),
3502 Field::new("c5", DataType::FixedSizeBinary(3), true),
3503 ]
3504 .into(),
3505 HashMap::new(),
3506 )
3507 .unwrap(),
3508 )
3509 });
3510 Arc::clone(&EXPR_TEST_SCHEMA)
3511 }
3512
3513 #[test]
3514 fn simplify_expr_null_comparison() {
3515 assert_eq!(
3517 simplify(lit(true).eq(lit(ScalarValue::Boolean(None)))),
3518 lit(ScalarValue::Boolean(None)),
3519 );
3520
3521 assert_eq!(
3523 simplify(
3524 lit(ScalarValue::Boolean(None)).not_eq(lit(ScalarValue::Boolean(None)))
3525 ),
3526 lit(ScalarValue::Boolean(None)),
3527 );
3528
3529 assert_eq!(
3531 simplify(col("c2").not_eq(lit(ScalarValue::Boolean(None)))),
3532 lit(ScalarValue::Boolean(None)),
3533 );
3534
3535 assert_eq!(
3537 simplify(lit(ScalarValue::Boolean(None)).eq(col("c2"))),
3538 lit(ScalarValue::Boolean(None)),
3539 );
3540 }
3541
3542 #[test]
3543 fn simplify_expr_is_not_null() {
3544 assert_eq!(
3545 simplify(Expr::IsNotNull(Box::new(col("c1")))),
3546 Expr::IsNotNull(Box::new(col("c1")))
3547 );
3548
3549 assert_eq!(
3551 simplify(Expr::IsNotNull(Box::new(col("c1_non_null")))),
3552 lit(true)
3553 );
3554 }
3555
3556 #[test]
3557 fn simplify_expr_is_null() {
3558 assert_eq!(
3559 simplify(Expr::IsNull(Box::new(col("c1")))),
3560 Expr::IsNull(Box::new(col("c1")))
3561 );
3562
3563 assert_eq!(
3565 simplify(Expr::IsNull(Box::new(col("c1_non_null")))),
3566 lit(false)
3567 );
3568 }
3569
3570 #[test]
3571 fn simplify_expr_is_unknown() {
3572 assert_eq!(simplify(col("c2").is_unknown()), col("c2").is_unknown(),);
3573
3574 assert_eq!(simplify(col("c2_non_null").is_unknown()), lit(false));
3576 }
3577
3578 #[test]
3579 fn simplify_expr_is_not_known() {
3580 assert_eq!(
3581 simplify(col("c2").is_not_unknown()),
3582 col("c2").is_not_unknown()
3583 );
3584
3585 assert_eq!(simplify(col("c2_non_null").is_not_unknown()), lit(true));
3587 }
3588
3589 #[test]
3590 fn simplify_expr_eq() {
3591 let schema = expr_test_schema();
3592 assert_eq!(col("c2").get_type(&schema).unwrap(), DataType::Boolean);
3593
3594 assert_eq!(simplify(lit(true).eq(lit(true))), lit(true));
3596
3597 assert_eq!(simplify(lit(true).eq(lit(false))), lit(false),);
3599
3600 assert_eq!(simplify(col("c2").eq(lit(true))), col("c2"));
3602
3603 assert_eq!(simplify(col("c2").eq(lit(false))), col("c2").not(),);
3605 }
3606
3607 #[test]
3608 fn simplify_expr_eq_skip_nonboolean_type() {
3609 let schema = expr_test_schema();
3610
3611 assert_eq!(col("c1").get_type(&schema).unwrap(), DataType::Utf8);
3617
3618 assert_eq!(simplify(col("c1").eq(lit("foo"))), col("c1").eq(lit("foo")),);
3620 }
3621
3622 #[test]
3623 fn simplify_expr_not_eq() {
3624 let schema = expr_test_schema();
3625
3626 assert_eq!(col("c2").get_type(&schema).unwrap(), DataType::Boolean);
3627
3628 assert_eq!(simplify(col("c2").not_eq(lit(true))), col("c2").not(),);
3630
3631 assert_eq!(simplify(col("c2").not_eq(lit(false))), col("c2"),);
3633
3634 assert_eq!(simplify(lit(true).not_eq(lit(true))), lit(false),);
3636
3637 assert_eq!(simplify(lit(true).not_eq(lit(false))), lit(true),);
3638 }
3639
3640 #[test]
3641 fn simplify_expr_not_eq_skip_nonboolean_type() {
3642 let schema = expr_test_schema();
3643
3644 assert_eq!(col("c1").get_type(&schema).unwrap(), DataType::Utf8);
3648
3649 assert_eq!(
3650 simplify(col("c1").not_eq(lit("foo"))),
3651 col("c1").not_eq(lit("foo")),
3652 );
3653 }
3654
3655 #[test]
3656 fn simplify_literal_case_equality() {
3657 let simple_case = Expr::Case(Case::new(
3659 None,
3660 vec![(
3661 Box::new(col("c2_non_null").not_eq(lit(false))),
3662 Box::new(lit("ok")),
3663 )],
3664 Some(Box::new(lit("not_ok"))),
3665 ));
3666
3667 assert_eq!(
3675 simplify(binary_expr(simple_case.clone(), Operator::Eq, lit("ok"),)),
3676 col("c2_non_null"),
3677 );
3678
3679 assert_eq!(
3687 simplify(binary_expr(simple_case, Operator::NotEq, lit("ok"),)),
3688 not(col("c2_non_null")),
3689 );
3690
3691 let complex_case = Expr::Case(Case::new(
3692 None,
3693 vec![
3694 (
3695 Box::new(col("c1").eq(lit("inboxed"))),
3696 Box::new(lit("pending")),
3697 ),
3698 (
3699 Box::new(col("c1").eq(lit("scheduled"))),
3700 Box::new(lit("pending")),
3701 ),
3702 (
3703 Box::new(col("c1").eq(lit("completed"))),
3704 Box::new(lit("completed")),
3705 ),
3706 (
3707 Box::new(col("c1").eq(lit("paused"))),
3708 Box::new(lit("paused")),
3709 ),
3710 (Box::new(col("c2")), Box::new(lit("running"))),
3711 (
3712 Box::new(col("c1").eq(lit("invoked")).and(col("c3").gt(lit(0)))),
3713 Box::new(lit("backing-off")),
3714 ),
3715 ],
3716 Some(Box::new(lit("ready"))),
3717 ));
3718
3719 assert_eq!(
3720 simplify(binary_expr(
3721 complex_case.clone(),
3722 Operator::Eq,
3723 lit("completed"),
3724 )),
3725 not_distinct_from(col("c1").eq(lit("completed")), lit(true)).and(
3726 distinct_from(col("c1").eq(lit("inboxed")), lit(true))
3727 .and(distinct_from(col("c1").eq(lit("scheduled")), lit(true)))
3728 )
3729 );
3730
3731 assert_eq!(
3732 simplify(binary_expr(
3733 complex_case.clone(),
3734 Operator::NotEq,
3735 lit("completed"),
3736 )),
3737 distinct_from(col("c1").eq(lit("completed")), lit(true))
3738 .or(not_distinct_from(col("c1").eq(lit("inboxed")), lit(true))
3739 .or(not_distinct_from(col("c1").eq(lit("scheduled")), lit(true))))
3740 );
3741
3742 assert_eq!(
3743 simplify(binary_expr(
3744 complex_case.clone(),
3745 Operator::Eq,
3746 lit("running"),
3747 )),
3748 not_distinct_from(col("c2"), lit(true)).and(
3749 distinct_from(col("c1").eq(lit("inboxed")), lit(true))
3750 .and(distinct_from(col("c1").eq(lit("scheduled")), lit(true)))
3751 .and(distinct_from(col("c1").eq(lit("completed")), lit(true)))
3752 .and(distinct_from(col("c1").eq(lit("paused")), lit(true)))
3753 )
3754 );
3755
3756 assert_eq!(
3757 simplify(binary_expr(
3758 complex_case.clone(),
3759 Operator::Eq,
3760 lit("ready"),
3761 )),
3762 distinct_from(col("c1").eq(lit("inboxed")), lit(true))
3763 .and(distinct_from(col("c1").eq(lit("scheduled")), lit(true)))
3764 .and(distinct_from(col("c1").eq(lit("completed")), lit(true)))
3765 .and(distinct_from(col("c1").eq(lit("paused")), lit(true)))
3766 .and(distinct_from(col("c2"), lit(true)))
3767 .and(distinct_from(
3768 col("c1").eq(lit("invoked")).and(col("c3").gt(lit(0))),
3769 lit(true)
3770 ))
3771 );
3772
3773 assert_eq!(
3774 simplify(binary_expr(
3775 complex_case.clone(),
3776 Operator::NotEq,
3777 lit("ready"),
3778 )),
3779 not_distinct_from(col("c1").eq(lit("inboxed")), lit(true))
3780 .or(not_distinct_from(col("c1").eq(lit("scheduled")), lit(true)))
3781 .or(not_distinct_from(col("c1").eq(lit("completed")), lit(true)))
3782 .or(not_distinct_from(col("c1").eq(lit("paused")), lit(true)))
3783 .or(not_distinct_from(col("c2"), lit(true)))
3784 .or(not_distinct_from(
3785 col("c1").eq(lit("invoked")).and(col("c3").gt(lit(0))),
3786 lit(true)
3787 ))
3788 );
3789 }
3790
3791 #[test]
3792 fn simplify_expr_case_when_then_else() {
3793 assert_eq!(
3799 simplify(Expr::Case(Case::new(
3800 None,
3801 vec![(
3802 Box::new(col("c2_non_null").not_eq(lit(false))),
3803 Box::new(lit("ok").eq(lit("not_ok"))),
3804 )],
3805 Some(Box::new(col("c2_non_null").eq(lit(true)))),
3806 ))),
3807 lit(false) );
3809
3810 assert_eq!(
3819 simplify(simplify(Expr::Case(Case::new(
3820 None,
3821 vec![(
3822 Box::new(col("c2_non_null").not_eq(lit(false))),
3823 Box::new(lit("ok").eq(lit("ok"))),
3824 )],
3825 Some(Box::new(col("c2_non_null").eq(lit(true)))),
3826 )))),
3827 col("c2_non_null")
3828 );
3829
3830 assert_eq!(
3837 simplify(simplify(Expr::Case(Case::new(
3838 None,
3839 vec![(Box::new(col("c2").is_null()), Box::new(lit(true)),)],
3840 Some(Box::new(col("c2"))),
3841 )))),
3842 col("c2")
3843 .is_null()
3844 .or(col("c2").is_not_null().and(col("c2")))
3845 );
3846
3847 assert_eq!(
3855 simplify(simplify(Expr::Case(Case::new(
3856 None,
3857 vec![
3858 (Box::new(col("c1_non_null")), Box::new(lit(true)),),
3859 (Box::new(col("c2_non_null")), Box::new(lit(false)),),
3860 ],
3861 Some(Box::new(lit(true))),
3862 )))),
3863 col("c1_non_null").or(col("c1_non_null").not().and(col("c2_non_null").not()))
3864 );
3865
3866 assert_eq!(
3874 simplify(simplify(Expr::Case(Case::new(
3875 None,
3876 vec![
3877 (Box::new(col("c1_non_null")), Box::new(lit(true)),),
3878 (Box::new(col("c2_non_null")), Box::new(lit(false)),),
3879 ],
3880 Some(Box::new(lit(true))),
3881 )))),
3882 col("c1_non_null").or(col("c1_non_null").not().and(col("c2_non_null").not()))
3883 );
3884
3885 assert_eq!(
3887 simplify(simplify(Expr::Case(Case::new(
3888 None,
3889 vec![(Box::new(col("c3").gt(lit(0_i64))), Box::new(lit(true)))],
3890 None,
3891 )))),
3892 not_distinct_from(col("c3").gt(lit(0_i64)), lit(true)).or(distinct_from(
3893 col("c3").gt(lit(0_i64)),
3894 lit(true)
3895 )
3896 .and(lit_bool_null()))
3897 );
3898
3899 assert_eq!(
3901 simplify(simplify(Expr::Case(Case::new(
3902 None,
3903 vec![(Box::new(col("c3").gt(lit(0_i64))), Box::new(lit(true)))],
3904 Some(Box::new(lit(false))),
3905 )))),
3906 not_distinct_from(col("c3").gt(lit(0_i64)), lit(true))
3907 );
3908 }
3909
3910 #[test]
3911 fn simplify_expr_case_when_first_true() {
3912 assert_eq!(
3914 simplify(Expr::Case(Case::new(
3915 None,
3916 vec![(Box::new(lit(true)), Box::new(lit(1)),)],
3917 Some(Box::new(col("c1"))),
3918 ))),
3919 lit(1)
3920 );
3921
3922 assert_eq!(
3924 simplify(Expr::Case(Case::new(
3925 None,
3926 vec![(Box::new(lit(true)), Box::new(lit("a")),)],
3927 Some(Box::new(lit("b"))),
3928 ))),
3929 lit("a")
3930 );
3931
3932 assert_eq!(
3934 simplify(Expr::Case(Case::new(
3935 None,
3936 vec![
3937 (Box::new(lit(true)), Box::new(lit("a"))),
3938 (Box::new(lit("x").gt(lit(5))), Box::new(lit("b"))),
3939 ],
3940 Some(Box::new(lit("c"))),
3941 ))),
3942 lit("a")
3943 );
3944
3945 assert_eq!(
3947 simplify(Expr::Case(Case::new(
3948 None,
3949 vec![(Box::new(lit(true)), Box::new(lit("a")),)],
3950 None,
3951 ))),
3952 lit("a")
3953 );
3954
3955 let expr = Expr::Case(Case::new(
3957 None,
3958 vec![(Box::new(col("c2")), Box::new(lit(1)))],
3959 Some(Box::new(lit(2))),
3960 ));
3961 assert_eq!(simplify(expr.clone()), expr);
3962
3963 let expr = Expr::Case(Case::new(
3965 None,
3966 vec![(Box::new(lit(false)), Box::new(lit(1)))],
3967 Some(Box::new(lit(2))),
3968 ));
3969 assert_ne!(simplify(expr), lit(1));
3970
3971 let expr = Expr::Case(Case::new(
3973 None,
3974 vec![(Box::new(col("c1").gt(lit(5))), Box::new(lit(1)))],
3975 Some(Box::new(lit(2))),
3976 ));
3977 assert_eq!(simplify(expr.clone()), expr);
3978 }
3979
3980 #[test]
3981 fn simplify_expr_case_when_any_true() {
3982 assert_eq!(
3984 simplify(Expr::Case(Case::new(
3985 None,
3986 vec![
3987 (Box::new(col("c3").gt(lit(0))), Box::new(lit("a"))),
3988 (Box::new(lit(true)), Box::new(lit("b"))),
3989 ],
3990 Some(Box::new(lit("c"))),
3991 ))),
3992 Expr::Case(Case::new(
3993 None,
3994 vec![(Box::new(col("c3").gt(lit(0))), Box::new(lit("a")))],
3995 Some(Box::new(lit("b"))),
3996 ))
3997 );
3998
3999 assert_eq!(
4002 simplify(Expr::Case(Case::new(
4003 None,
4004 vec![
4005 (Box::new(col("c3").gt(lit(0))), Box::new(lit("a"))),
4006 (Box::new(col("c4").lt(lit(0))), Box::new(lit("b"))),
4007 (Box::new(lit(true)), Box::new(lit("c"))),
4008 (Box::new(col("c3").eq(lit(0))), Box::new(lit("d"))),
4009 ],
4010 Some(Box::new(lit("e"))),
4011 ))),
4012 Expr::Case(Case::new(
4013 None,
4014 vec![
4015 (Box::new(col("c3").gt(lit(0))), Box::new(lit("a"))),
4016 (Box::new(col("c4").lt(lit(0))), Box::new(lit("b"))),
4017 ],
4018 Some(Box::new(lit("c"))),
4019 ))
4020 );
4021
4022 assert_eq!(
4025 simplify(Expr::Case(Case::new(
4026 None,
4027 vec![
4028 (Box::new(col("c3").gt(lit(0))), Box::new(lit(1))),
4029 (Box::new(col("c4").lt(lit(0))), Box::new(lit(2))),
4030 (Box::new(lit(true)), Box::new(lit(3))),
4031 ],
4032 None,
4033 ))),
4034 Expr::Case(Case::new(
4035 None,
4036 vec![
4037 (Box::new(col("c3").gt(lit(0))), Box::new(lit(1))),
4038 (Box::new(col("c4").lt(lit(0))), Box::new(lit(2))),
4039 ],
4040 Some(Box::new(lit(3))),
4041 ))
4042 );
4043
4044 let expr = Expr::Case(Case::new(
4046 None,
4047 vec![
4048 (Box::new(col("c3").gt(lit(0))), Box::new(col("c3"))),
4049 (Box::new(col("c4").lt(lit(0))), Box::new(lit(2))),
4050 ],
4051 Some(Box::new(lit(3))),
4052 ));
4053 assert_eq!(simplify(expr.clone()), expr);
4054 }
4055
4056 #[test]
4057 fn simplify_expr_case_when_any_false() {
4058 assert_eq!(
4060 simplify(Expr::Case(Case::new(
4061 None,
4062 vec![(Box::new(lit(false)), Box::new(lit("a")))],
4063 None,
4064 ))),
4065 Expr::Literal(ScalarValue::Utf8(None), None)
4066 );
4067
4068 assert_eq!(
4070 simplify(Expr::Case(Case::new(
4071 None,
4072 vec![(Box::new(lit(false)), Box::new(lit(2)))],
4073 Some(Box::new(lit(1))),
4074 ))),
4075 lit(1),
4076 );
4077
4078 assert_eq!(
4080 simplify(Expr::Case(Case::new(
4081 None,
4082 vec![
4083 (Box::new(col("c3").lt(lit(10))), Box::new(lit("b"))),
4084 (Box::new(lit(false)), Box::new(col("c3"))),
4085 ],
4086 Some(Box::new(col("c4"))),
4087 ))),
4088 Expr::Case(Case::new(
4089 None,
4090 vec![(Box::new(col("c3").lt(lit(10))), Box::new(lit("b")))],
4091 Some(Box::new(col("c4"))),
4092 ))
4093 );
4094
4095 let expr = Expr::Case(Case::new(
4097 None,
4098 vec![(Box::new(col("c3").eq(lit(4))), Box::new(lit(1)))],
4099 Some(Box::new(lit(2))),
4100 ));
4101 assert_eq!(simplify(expr.clone()), expr);
4102 }
4103
4104 fn distinct_from(left: impl Into<Expr>, right: impl Into<Expr>) -> Expr {
4105 Expr::BinaryExpr(BinaryExpr {
4106 left: Box::new(left.into()),
4107 op: Operator::IsDistinctFrom,
4108 right: Box::new(right.into()),
4109 })
4110 }
4111
4112 fn not_distinct_from(left: impl Into<Expr>, right: impl Into<Expr>) -> Expr {
4113 Expr::BinaryExpr(BinaryExpr {
4114 left: Box::new(left.into()),
4115 op: Operator::IsNotDistinctFrom,
4116 right: Box::new(right.into()),
4117 })
4118 }
4119
4120 #[test]
4121 fn simplify_expr_bool_or() {
4122 assert_eq!(simplify(col("c2").or(lit(true))), lit(true),);
4124
4125 assert_eq!(simplify(col("c2").or(lit(false))), col("c2"),);
4127
4128 assert_eq!(simplify(lit(true).or(lit_bool_null())), lit(true),);
4130
4131 assert_eq!(simplify(lit_bool_null().or(lit(true))), lit(true),);
4133
4134 assert_eq!(simplify(lit(false).or(lit_bool_null())), lit_bool_null(),);
4136
4137 assert_eq!(simplify(lit_bool_null().or(lit(false))), lit_bool_null(),);
4139
4140 let expr = col("c1").between(lit(0), lit(10));
4144 let expr = expr.or(lit_bool_null());
4145 let result = simplify(expr);
4146
4147 let expected_expr = or(
4148 and(col("c1").gt_eq(lit(0)), col("c1").lt_eq(lit(10))),
4149 lit_bool_null(),
4150 );
4151 assert_eq!(expected_expr, result);
4152 }
4153
4154 #[test]
4155 fn simplify_inlist() {
4156 assert_eq!(simplify(in_list(col("c1"), vec![], false)), lit(false));
4157 assert_eq!(simplify(in_list(col("c1"), vec![], true)), lit(true));
4158
4159 assert_eq!(
4161 simplify(in_list(lit_bool_null(), vec![col("c1"), lit(1)], false)),
4162 lit_bool_null()
4163 );
4164
4165 assert_eq!(
4167 simplify(in_list(lit_bool_null(), vec![col("c1"), lit(1)], true)),
4168 lit_bool_null()
4169 );
4170
4171 assert_eq!(
4172 simplify(in_list(col("c1"), vec![lit(1)], false)),
4173 col("c1").eq(lit(1))
4174 );
4175 assert_eq!(
4176 simplify(in_list(col("c1"), vec![lit(1)], true)),
4177 col("c1").not_eq(lit(1))
4178 );
4179
4180 assert_eq!(
4183 simplify(in_list(col("c1") * lit(10), vec![lit(2)], false)),
4184 (col("c1") * lit(10)).eq(lit(2))
4185 );
4186
4187 assert_eq!(
4188 simplify(in_list(col("c1"), vec![lit(1), lit(2)], false)),
4189 col("c1").eq(lit(1)).or(col("c1").eq(lit(2)))
4190 );
4191 assert_eq!(
4192 simplify(in_list(col("c1"), vec![lit(1), lit(2)], true)),
4193 col("c1").not_eq(lit(1)).and(col("c1").not_eq(lit(2)))
4194 );
4195
4196 let subquery = Arc::new(test_table_scan_with_name("test").unwrap());
4197 assert_eq!(
4198 simplify(in_list(
4199 col("c1"),
4200 vec![scalar_subquery(Arc::clone(&subquery))],
4201 false
4202 )),
4203 in_subquery(col("c1"), Arc::clone(&subquery))
4204 );
4205 assert_eq!(
4206 simplify(in_list(
4207 col("c1"),
4208 vec![scalar_subquery(Arc::clone(&subquery))],
4209 true
4210 )),
4211 not_in_subquery(col("c1"), subquery)
4212 );
4213
4214 let subquery1 =
4215 scalar_subquery(Arc::new(test_table_scan_with_name("test1").unwrap()));
4216 let subquery2 =
4217 scalar_subquery(Arc::new(test_table_scan_with_name("test2").unwrap()));
4218
4219 assert_eq!(
4221 simplify(in_list(
4222 col("c1"),
4223 vec![subquery1.clone(), subquery2.clone()],
4224 true
4225 )),
4226 col("c1")
4227 .not_eq(subquery1.clone())
4228 .and(col("c1").not_eq(subquery2.clone()))
4229 );
4230
4231 assert_eq!(
4233 simplify(in_list(
4234 col("c1"),
4235 vec![subquery1.clone(), subquery2.clone()],
4236 false
4237 )),
4238 col("c1").eq(subquery1).or(col("c1").eq(subquery2))
4239 );
4240
4241 let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false).and(
4243 in_list(col("c1"), vec![lit(5), lit(6), lit(7), lit(8)], false),
4244 );
4245 assert_eq!(simplify(expr), lit(false));
4246
4247 let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false).and(
4249 in_list(col("c1"), vec![lit(4), lit(5), lit(6), lit(7)], false),
4250 );
4251 assert_eq!(simplify(expr), col("c1").eq(lit(4)));
4252
4253 let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).or(
4255 in_list(col("c1"), vec![lit(5), lit(6), lit(7), lit(8)], true),
4256 );
4257 assert_eq!(simplify(expr), lit(true));
4258
4259 let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).or(
4261 in_list(col("c1"), vec![lit(4), lit(5), lit(6), lit(7)], true),
4262 );
4263 assert_eq!(simplify(expr), col("c1").not_eq(lit(4)));
4264
4265 let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).and(
4267 in_list(col("c1"), vec![lit(4), lit(5), lit(6), lit(7)], true),
4268 );
4269 assert_eq!(
4270 simplify(expr),
4271 in_list(
4272 col("c1"),
4273 vec![lit(1), lit(2), lit(3), lit(4), lit(5), lit(6), lit(7)],
4274 true
4275 )
4276 );
4277
4278 let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false).or(
4280 in_list(col("c1"), vec![lit(2), lit(3), lit(4), lit(5)], false),
4281 );
4282 assert_eq!(
4283 simplify(expr),
4284 in_list(
4285 col("c1"),
4286 vec![lit(1), lit(2), lit(3), lit(4), lit(5)],
4287 false
4288 )
4289 );
4290
4291 let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3)], false).and(in_list(
4293 col("c1"),
4294 vec![lit(1), lit(2), lit(3), lit(4), lit(5)],
4295 true,
4296 ));
4297 assert_eq!(simplify(expr), lit(false));
4298
4299 let expr =
4301 in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).and(in_list(
4302 col("c1"),
4303 vec![lit(1), lit(2), lit(3), lit(4), lit(5)],
4304 false,
4305 ));
4306 assert_eq!(simplify(expr), col("c1").eq(lit(5)));
4307
4308 let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false).and(
4310 in_list(col("c1"), vec![lit(5), lit(6), lit(7), lit(8)], true),
4311 );
4312 assert_eq!(
4313 simplify(expr),
4314 in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false)
4315 );
4316
4317 let expr = in_list(
4320 col("c1"),
4321 vec![lit(1), lit(2), lit(3), lit(4), lit(5), lit(6)],
4322 false,
4323 )
4324 .and(in_list(
4325 col("c1"),
4326 vec![lit(1), lit(3), lit(5), lit(6)],
4327 false,
4328 ))
4329 .and(in_list(col("c1"), vec![lit(3), lit(6)], false));
4330 assert_eq!(
4331 simplify(expr),
4332 col("c1").eq(lit(3)).or(col("c1").eq(lit(6)))
4333 );
4334
4335 let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).and(
4337 in_list(col("c1"), vec![lit(5), lit(6), lit(7), lit(8)], false)
4338 .and(in_list(
4339 col("c1"),
4340 vec![lit(3), lit(4), lit(5), lit(6)],
4341 true,
4342 ))
4343 .and(in_list(col("c1"), vec![lit(8), lit(9), lit(10)], false)),
4344 );
4345 assert_eq!(simplify(expr), col("c1").eq(lit(8)));
4346
4347 let expr =
4350 in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).or(col("c1")
4351 .not_eq(lit(5))
4352 .or(in_list(
4353 col("c1"),
4354 vec![lit(6), lit(7), lit(8), lit(9)],
4355 true,
4356 )));
4357 assert_eq!(simplify(expr.clone()), expr);
4361 }
4362
4363 #[test]
4364 fn simplify_null_in_empty_inlist() {
4365 let expr = in_list(lit_bool_null(), vec![], false);
4367 assert_eq!(simplify(expr), lit(false));
4368
4369 let expr = in_list(lit_bool_null(), vec![], true);
4371 assert_eq!(simplify(expr), lit(true));
4372
4373 let null_null = || Expr::Literal(ScalarValue::Null, None);
4375 let expr = in_list(null_null(), vec![], false);
4376 assert_eq!(simplify(expr), lit(false));
4377
4378 let expr = in_list(null_null(), vec![], true);
4380 assert_eq!(simplify(expr), lit(true));
4381 }
4382
4383 #[test]
4384 fn just_simplifier_simplify_null_in_empty_inlist() {
4385 let simplify = |expr: Expr| -> Expr {
4386 let schema = expr_test_schema();
4387 let execution_props = ExecutionProps::new();
4388 let info = SimplifyContext::new(&execution_props).with_schema(schema);
4389 let simplifier = &mut Simplifier::new(&info);
4390 expr.rewrite(simplifier)
4391 .expect("Failed to simplify expression")
4392 .data
4393 };
4394
4395 let expr = in_list(lit_bool_null(), vec![], false);
4397 assert_eq!(simplify(expr), lit(false));
4398
4399 let expr = in_list(lit_bool_null(), vec![], true);
4401 assert_eq!(simplify(expr), lit(true));
4402
4403 let null_null = || Expr::Literal(ScalarValue::Null, None);
4405 let expr = in_list(null_null(), vec![], false);
4406 assert_eq!(simplify(expr), lit(false));
4407
4408 let expr = in_list(null_null(), vec![], true);
4410 assert_eq!(simplify(expr), lit(true));
4411 }
4412
4413 #[test]
4414 fn simplify_large_or() {
4415 let expr = (0..5)
4416 .map(|i| col("c1").eq(lit(i)))
4417 .fold(lit(false), |acc, e| acc.or(e));
4418 assert_eq!(
4419 simplify(expr),
4420 in_list(col("c1"), (0..5).map(lit).collect(), false),
4421 );
4422 }
4423
4424 #[test]
4425 fn simplify_expr_bool_and() {
4426 assert_eq!(simplify(col("c2").and(lit(true))), col("c2"),);
4428 assert_eq!(simplify(col("c2").and(lit(false))), lit(false),);
4430
4431 assert_eq!(simplify(lit(true).and(lit_bool_null())), lit_bool_null(),);
4433
4434 assert_eq!(simplify(lit_bool_null().and(lit(true))), lit_bool_null(),);
4436
4437 assert_eq!(simplify(lit(false).and(lit_bool_null())), lit(false),);
4439
4440 assert_eq!(simplify(lit_bool_null().and(lit(false))), lit(false),);
4442
4443 let expr = col("c1").between(lit(0), lit(10));
4447 let expr = expr.and(lit_bool_null());
4448 let result = simplify(expr);
4449
4450 let expected_expr = and(
4451 and(col("c1").gt_eq(lit(0)), col("c1").lt_eq(lit(10))),
4452 lit_bool_null(),
4453 );
4454 assert_eq!(expected_expr, result);
4455 }
4456
4457 #[test]
4458 fn simplify_expr_between() {
4459 let expr = col("c2").between(lit(3), lit(4));
4461 assert_eq!(
4462 simplify(expr),
4463 and(col("c2").gt_eq(lit(3)), col("c2").lt_eq(lit(4)))
4464 );
4465
4466 let expr = col("c2").not_between(lit(3), lit(4));
4468 assert_eq!(
4469 simplify(expr),
4470 or(col("c2").lt(lit(3)), col("c2").gt(lit(4)))
4471 );
4472 }
4473
4474 #[test]
4475 fn test_like_and_ilike() {
4476 let null = lit(ScalarValue::Utf8(None));
4477
4478 let expr = col("c1").like(null.clone());
4480 assert_eq!(simplify(expr), lit_bool_null());
4481
4482 let expr = col("c1").not_like(null.clone());
4483 assert_eq!(simplify(expr), lit_bool_null());
4484
4485 let expr = col("c1").ilike(null.clone());
4486 assert_eq!(simplify(expr), lit_bool_null());
4487
4488 let expr = col("c1").not_ilike(null.clone());
4489 assert_eq!(simplify(expr), lit_bool_null());
4490
4491 let expr = col("c1").like(lit("%"));
4493 assert_eq!(simplify(expr), if_not_null(col("c1"), true));
4494
4495 let expr = col("c1").not_like(lit("%"));
4496 assert_eq!(simplify(expr), if_not_null(col("c1"), false));
4497
4498 let expr = col("c1").ilike(lit("%"));
4499 assert_eq!(simplify(expr), if_not_null(col("c1"), true));
4500
4501 let expr = col("c1").not_ilike(lit("%"));
4502 assert_eq!(simplify(expr), if_not_null(col("c1"), false));
4503
4504 let expr = col("c1").like(lit("%%"));
4506 assert_eq!(simplify(expr), if_not_null(col("c1"), true));
4507
4508 let expr = col("c1").not_like(lit("%%"));
4509 assert_eq!(simplify(expr), if_not_null(col("c1"), false));
4510
4511 let expr = col("c1").ilike(lit("%%"));
4512 assert_eq!(simplify(expr), if_not_null(col("c1"), true));
4513
4514 let expr = col("c1").not_ilike(lit("%%"));
4515 assert_eq!(simplify(expr), if_not_null(col("c1"), false));
4516
4517 let expr = col("c1_non_null").like(lit("%"));
4519 assert_eq!(simplify(expr), lit(true));
4520
4521 let expr = col("c1_non_null").not_like(lit("%"));
4522 assert_eq!(simplify(expr), lit(false));
4523
4524 let expr = col("c1_non_null").ilike(lit("%"));
4525 assert_eq!(simplify(expr), lit(true));
4526
4527 let expr = col("c1_non_null").not_ilike(lit("%"));
4528 assert_eq!(simplify(expr), lit(false));
4529
4530 let expr = col("c1_non_null").like(lit("%%"));
4532 assert_eq!(simplify(expr), lit(true));
4533
4534 let expr = col("c1_non_null").not_like(lit("%%"));
4535 assert_eq!(simplify(expr), lit(false));
4536
4537 let expr = col("c1_non_null").ilike(lit("%%"));
4538 assert_eq!(simplify(expr), lit(true));
4539
4540 let expr = col("c1_non_null").not_ilike(lit("%%"));
4541 assert_eq!(simplify(expr), lit(false));
4542
4543 let expr = null.clone().like(lit("%"));
4545 assert_eq!(simplify(expr), lit_bool_null());
4546
4547 let expr = null.clone().not_like(lit("%"));
4548 assert_eq!(simplify(expr), lit_bool_null());
4549
4550 let expr = null.clone().ilike(lit("%"));
4551 assert_eq!(simplify(expr), lit_bool_null());
4552
4553 let expr = null.clone().not_ilike(lit("%"));
4554 assert_eq!(simplify(expr), lit_bool_null());
4555
4556 let expr = null.clone().like(lit("%%"));
4558 assert_eq!(simplify(expr), lit_bool_null());
4559
4560 let expr = null.clone().not_like(lit("%%"));
4561 assert_eq!(simplify(expr), lit_bool_null());
4562
4563 let expr = null.clone().ilike(lit("%%"));
4564 assert_eq!(simplify(expr), lit_bool_null());
4565
4566 let expr = null.clone().not_ilike(lit("%%"));
4567 assert_eq!(simplify(expr), lit_bool_null());
4568
4569 let expr = null.clone().like(lit("a%"));
4571 assert_eq!(simplify(expr), lit_bool_null());
4572
4573 let expr = null.clone().not_like(lit("a%"));
4574 assert_eq!(simplify(expr), lit_bool_null());
4575
4576 let expr = null.clone().ilike(lit("a%"));
4577 assert_eq!(simplify(expr), lit_bool_null());
4578
4579 let expr = null.clone().not_ilike(lit("a%"));
4580 assert_eq!(simplify(expr), lit_bool_null());
4581
4582 let expr = col("c1").like(lit("a"));
4584 assert_eq!(simplify(expr), col("c1").eq(lit("a")));
4585 let expr = col("c1").not_like(lit("a"));
4586 assert_eq!(simplify(expr), col("c1").not_eq(lit("a")));
4587 let expr = col("c1").like(lit("a_"));
4588 assert_eq!(simplify(expr), col("c1").like(lit("a_")));
4589 let expr = col("c1").not_like(lit("a_"));
4590 assert_eq!(simplify(expr), col("c1").not_like(lit("a_")));
4591
4592 let expr = col("c1").ilike(lit("a"));
4593 assert_eq!(simplify(expr), col("c1").ilike(lit("a")));
4594 let expr = col("c1").not_ilike(lit("a"));
4595 assert_eq!(simplify(expr), col("c1").not_ilike(lit("a")));
4596 }
4597
4598 #[test]
4599 fn test_simplify_with_guarantee() {
4600 let expr_x = col("c3").gt(lit(3_i64));
4602 let expr_y = (col("c4") + lit(2_u32)).lt(lit(10_u32));
4603 let expr_z = col("c1").in_list(vec![lit("a"), lit("b")], true);
4604 let expr = expr_x.clone().and(expr_y.or(expr_z));
4605
4606 let guarantees = vec![
4608 (col("c3"), NullableInterval::from(ScalarValue::Int64(None))),
4609 (col("c4"), NullableInterval::from(ScalarValue::UInt32(None))),
4610 (col("c1"), NullableInterval::from(ScalarValue::Utf8(None))),
4611 ];
4612
4613 let output = simplify_with_guarantee(expr.clone(), guarantees);
4614 assert_eq!(output, lit_bool_null());
4615
4616 let guarantees = vec![
4618 (
4619 col("c3"),
4620 NullableInterval::NotNull {
4621 values: Interval::make(Some(0_i64), Some(2_i64)).unwrap(),
4622 },
4623 ),
4624 (
4625 col("c4"),
4626 NullableInterval::from(ScalarValue::UInt32(Some(9))),
4627 ),
4628 (col("c1"), NullableInterval::from(ScalarValue::from("a"))),
4629 ];
4630 let output = simplify_with_guarantee(expr.clone(), guarantees);
4631 assert_eq!(output, lit(false));
4632
4633 let guarantees = vec![
4635 (
4636 col("c3"),
4637 NullableInterval::MaybeNull {
4638 values: Interval::make(Some(0_i64), Some(2_i64)).unwrap(),
4639 },
4640 ),
4641 (
4642 col("c4"),
4643 NullableInterval::MaybeNull {
4644 values: Interval::make(Some(9_u32), Some(9_u32)).unwrap(),
4645 },
4646 ),
4647 (
4648 col("c1"),
4649 NullableInterval::NotNull {
4650 values: Interval::try_new(
4651 ScalarValue::from("d"),
4652 ScalarValue::from("f"),
4653 )
4654 .unwrap(),
4655 },
4656 ),
4657 ];
4658 let output = simplify_with_guarantee(expr.clone(), guarantees);
4659 assert_eq!(&output, &expr_x);
4660
4661 let guarantees = vec![
4663 (
4664 col("c3"),
4665 NullableInterval::from(ScalarValue::Int64(Some(9))),
4666 ),
4667 (
4668 col("c4"),
4669 NullableInterval::from(ScalarValue::UInt32(Some(3))),
4670 ),
4671 ];
4672 let output = simplify_with_guarantee(expr.clone(), guarantees);
4673 assert_eq!(output, lit(true));
4674
4675 let guarantees = vec![(
4677 col("c4"),
4678 NullableInterval::from(ScalarValue::UInt32(Some(3))),
4679 )];
4680 let output = simplify_with_guarantee(expr, guarantees);
4681 assert_eq!(&output, &expr_x);
4682 }
4683
4684 #[test]
4685 fn test_expression_partial_simplify_1() {
4686 let expr = (lit(1) + lit(2)) + (lit(4) / lit(0));
4688 let expected = (lit(3)) + (lit(4) / lit(0));
4689
4690 assert_eq!(simplify(expr), expected);
4691 }
4692
4693 #[test]
4694 fn test_expression_partial_simplify_2() {
4695 let expr = (lit(1).gt(lit(2))).and(lit(4) / lit(0));
4697 let expected = lit(false);
4698
4699 assert_eq!(simplify(expr), expected);
4700 }
4701
4702 #[test]
4703 fn test_simplify_cycles() {
4704 let expr = lit(true);
4706 let expected = lit(true);
4707 let (expr, num_iter) = simplify_with_cycle_count(expr);
4708 assert_eq!(expr, expected);
4709 assert_eq!(num_iter, 1);
4710
4711 let expr = lit(true).not_eq(lit_bool_null()).or(lit(5).gt(lit(10)));
4713 let expected = lit_bool_null();
4714 let (expr, num_iter) = simplify_with_cycle_count(expr);
4715 assert_eq!(expr, expected);
4716 assert_eq!(num_iter, 2);
4717
4718 let expr = (((col("c4") - lit(10)) + lit(10)) * lit(100)) / lit(100);
4721 let expected = expr.clone();
4722 let (expr, num_iter) = simplify_with_cycle_count(expr);
4723 assert_eq!(expr, expected);
4724 assert_eq!(num_iter, 1);
4725
4726 let expr = col("c4")
4728 .lt(lit(1))
4729 .or(col("c3").lt(lit(2)))
4730 .and(col("c3_non_null").lt(lit(3)))
4731 .and(lit(false));
4732 let expected = lit(false);
4733 let (expr, num_iter) = simplify_with_cycle_count(expr);
4734 assert_eq!(expr, expected);
4735 assert_eq!(num_iter, 2);
4736 }
4737
4738 fn boolean_test_schema() -> DFSchemaRef {
4739 static BOOLEAN_TEST_SCHEMA: LazyLock<DFSchemaRef> = LazyLock::new(|| {
4740 Schema::new(vec![
4741 Field::new("A", DataType::Boolean, false),
4742 Field::new("B", DataType::Boolean, false),
4743 Field::new("C", DataType::Boolean, false),
4744 Field::new("D", DataType::Boolean, false),
4745 ])
4746 .to_dfschema_ref()
4747 .unwrap()
4748 });
4749 Arc::clone(&BOOLEAN_TEST_SCHEMA)
4750 }
4751
4752 #[test]
4753 fn simplify_common_factor_conjunction_in_disjunction() {
4754 let props = ExecutionProps::new();
4755 let schema = boolean_test_schema();
4756 let simplifier =
4757 ExprSimplifier::new(SimplifyContext::new(&props).with_schema(schema));
4758
4759 let a = || col("A");
4760 let b = || col("B");
4761 let c = || col("C");
4762 let d = || col("D");
4763
4764 let expr = a().and(b()).or(a().and(c()));
4766 let expected = a().and(b().or(c()));
4767
4768 assert_eq!(expected, simplifier.simplify(expr).unwrap());
4769
4770 let expr = a().and(b()).or(a().and(c())).or(a().and(d()));
4772 let expected = a().and(b().or(c()).or(d()));
4773 assert_eq!(expected, simplifier.simplify(expr).unwrap());
4774
4775 let expr = a().or(b().and(c().and(a())));
4777 let expected = a();
4778 assert_eq!(expected, simplifier.simplify(expr).unwrap());
4779 }
4780
4781 #[test]
4782 fn test_simplify_udaf() {
4783 let udaf = AggregateUDF::new_from_impl(SimplifyMockUdaf::new_with_simplify());
4784 let aggregate_function_expr =
4785 Expr::AggregateFunction(expr::AggregateFunction::new_udf(
4786 udaf.into(),
4787 vec![],
4788 false,
4789 None,
4790 vec![],
4791 None,
4792 ));
4793
4794 let expected = col("result_column");
4795 assert_eq!(simplify(aggregate_function_expr), expected);
4796
4797 let udaf = AggregateUDF::new_from_impl(SimplifyMockUdaf::new_without_simplify());
4798 let aggregate_function_expr =
4799 Expr::AggregateFunction(expr::AggregateFunction::new_udf(
4800 udaf.into(),
4801 vec![],
4802 false,
4803 None,
4804 vec![],
4805 None,
4806 ));
4807
4808 let expected = aggregate_function_expr.clone();
4809 assert_eq!(simplify(aggregate_function_expr), expected);
4810 }
4811
4812 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
4815 struct SimplifyMockUdaf {
4816 simplify: bool,
4817 }
4818
4819 impl SimplifyMockUdaf {
4820 fn new_with_simplify() -> Self {
4822 Self { simplify: true }
4823 }
4824 fn new_without_simplify() -> Self {
4826 Self { simplify: false }
4827 }
4828 }
4829
4830 impl AggregateUDFImpl for SimplifyMockUdaf {
4831 fn as_any(&self) -> &dyn std::any::Any {
4832 self
4833 }
4834
4835 fn name(&self) -> &str {
4836 "mock_simplify"
4837 }
4838
4839 fn signature(&self) -> &Signature {
4840 unimplemented!()
4841 }
4842
4843 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
4844 unimplemented!("not needed for tests")
4845 }
4846
4847 fn accumulator(
4848 &self,
4849 _acc_args: AccumulatorArgs,
4850 ) -> Result<Box<dyn Accumulator>> {
4851 unimplemented!("not needed for tests")
4852 }
4853
4854 fn groups_accumulator_supported(&self, _args: AccumulatorArgs) -> bool {
4855 unimplemented!("not needed for testing")
4856 }
4857
4858 fn create_groups_accumulator(
4859 &self,
4860 _args: AccumulatorArgs,
4861 ) -> Result<Box<dyn GroupsAccumulator>> {
4862 unimplemented!("not needed for testing")
4863 }
4864
4865 fn simplify(&self) -> Option<AggregateFunctionSimplification> {
4866 if self.simplify {
4867 Some(Box::new(|_, _| Ok(col("result_column"))))
4868 } else {
4869 None
4870 }
4871 }
4872 }
4873
4874 #[test]
4875 fn test_simplify_udwf() {
4876 let udwf = WindowFunctionDefinition::WindowUDF(
4877 WindowUDF::new_from_impl(SimplifyMockUdwf::new_with_simplify()).into(),
4878 );
4879 let window_function_expr = Expr::from(WindowFunction::new(udwf, vec![]));
4880
4881 let expected = col("result_column");
4882 assert_eq!(simplify(window_function_expr), expected);
4883
4884 let udwf = WindowFunctionDefinition::WindowUDF(
4885 WindowUDF::new_from_impl(SimplifyMockUdwf::new_without_simplify()).into(),
4886 );
4887 let window_function_expr = Expr::from(WindowFunction::new(udwf, vec![]));
4888
4889 let expected = window_function_expr.clone();
4890 assert_eq!(simplify(window_function_expr), expected);
4891 }
4892
4893 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
4896 struct SimplifyMockUdwf {
4897 simplify: bool,
4898 }
4899
4900 impl SimplifyMockUdwf {
4901 fn new_with_simplify() -> Self {
4903 Self { simplify: true }
4904 }
4905 fn new_without_simplify() -> Self {
4907 Self { simplify: false }
4908 }
4909 }
4910
4911 impl WindowUDFImpl for SimplifyMockUdwf {
4912 fn as_any(&self) -> &dyn std::any::Any {
4913 self
4914 }
4915
4916 fn name(&self) -> &str {
4917 "mock_simplify"
4918 }
4919
4920 fn signature(&self) -> &Signature {
4921 unimplemented!()
4922 }
4923
4924 fn simplify(&self) -> Option<WindowFunctionSimplification> {
4925 if self.simplify {
4926 Some(Box::new(|_, _| Ok(col("result_column"))))
4927 } else {
4928 None
4929 }
4930 }
4931
4932 fn partition_evaluator(
4933 &self,
4934 _partition_evaluator_args: PartitionEvaluatorArgs,
4935 ) -> Result<Box<dyn PartitionEvaluator>> {
4936 unimplemented!("not needed for tests")
4937 }
4938
4939 fn field(&self, _field_args: WindowUDFFieldArgs) -> Result<FieldRef> {
4940 unimplemented!("not needed for tests")
4941 }
4942
4943 fn limit_effect(&self, _args: &[Arc<dyn PhysicalExpr>]) -> LimitEffect {
4944 LimitEffect::Unknown
4945 }
4946 }
4947 #[derive(Debug, PartialEq, Eq, Hash)]
4948 struct VolatileUdf {
4949 signature: Signature,
4950 }
4951
4952 impl VolatileUdf {
4953 pub fn new() -> Self {
4954 Self {
4955 signature: Signature::exact(vec![], Volatility::Volatile),
4956 }
4957 }
4958 }
4959 impl ScalarUDFImpl for VolatileUdf {
4960 fn as_any(&self) -> &dyn std::any::Any {
4961 self
4962 }
4963
4964 fn name(&self) -> &str {
4965 "VolatileUdf"
4966 }
4967
4968 fn signature(&self) -> &Signature {
4969 &self.signature
4970 }
4971
4972 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
4973 Ok(DataType::Int16)
4974 }
4975
4976 fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result<ColumnarValue> {
4977 panic!("dummy - not implemented")
4978 }
4979 }
4980
4981 #[test]
4982 fn test_optimize_volatile_conditions() {
4983 let fun = Arc::new(ScalarUDF::new_from_impl(VolatileUdf::new()));
4984 let rand = Expr::ScalarFunction(ScalarFunction::new_udf(fun, vec![]));
4985 {
4986 let expr = rand
4987 .clone()
4988 .eq(lit(0))
4989 .or(col("column1").eq(lit(2)).and(rand.clone().eq(lit(0))));
4990
4991 assert_eq!(simplify(expr.clone()), expr);
4992 }
4993
4994 {
4995 let expr = col("column1")
4996 .eq(lit(2))
4997 .or(col("column1").eq(lit(2)).and(rand.clone().eq(lit(0))));
4998
4999 assert_eq!(simplify(expr), col("column1").eq(lit(2)));
5000 }
5001
5002 {
5003 let expr = (col("column1").eq(lit(2)).and(rand.clone().eq(lit(0)))).or(col(
5004 "column1",
5005 )
5006 .eq(lit(2))
5007 .and(rand.clone().eq(lit(0))));
5008
5009 assert_eq!(
5010 simplify(expr),
5011 col("column1")
5012 .eq(lit(2))
5013 .and((rand.clone().eq(lit(0))).or(rand.clone().eq(lit(0))))
5014 );
5015 }
5016 }
5017
5018 #[test]
5019 fn simplify_fixed_size_binary_eq_lit() {
5020 let bytes = [1u8, 2, 3].as_slice();
5021
5022 let expr = col("c5").eq(lit(bytes));
5024
5025 let coerced = coerce(expr.clone());
5027 let schema = expr_test_schema();
5028 assert_eq!(
5029 coerced,
5030 col("c5")
5031 .cast_to(&DataType::Binary, schema.as_ref())
5032 .unwrap()
5033 .eq(lit(bytes))
5034 );
5035
5036 assert_eq!(
5038 simplify(coerced),
5039 col("c5").eq(Expr::Literal(
5040 ScalarValue::FixedSizeBinary(3, Some(bytes.to_vec()),),
5041 None
5042 ))
5043 );
5044 }
5045
5046 #[test]
5047 fn simplify_cast_literal() {
5048 let expr = Expr::Cast(Cast::new(Box::new(lit(123i32)), DataType::Int64));
5052 let expected = lit(123i64);
5053 assert_eq!(simplify(expr), expected);
5054
5055 let expr = Expr::Cast(Cast::new(
5058 Box::new(lit(1761630189642i64)),
5059 DataType::Timestamp(
5060 arrow::datatypes::TimeUnit::Nanosecond,
5061 Some("+00:00".into()),
5062 ),
5063 ));
5064 let result = simplify(expr);
5066 match result {
5067 Expr::Literal(ScalarValue::TimestampNanosecond(Some(val), tz), _) => {
5068 assert_eq!(val, 1761630189642i64);
5069 assert_eq!(tz.as_deref(), Some("+00:00"));
5070 }
5071 other => panic!("Expected TimestampNanosecond literal, got: {other:?}"),
5072 }
5073
5074 let expr = Expr::Cast(Cast::new(
5078 Box::new(lit("1761630189642")),
5079 DataType::Timestamp(
5080 arrow::datatypes::TimeUnit::Nanosecond,
5081 Some("+00:00".into()),
5082 ),
5083 ));
5084
5085 let schema = test_schema();
5087 let props = ExecutionProps::new();
5088 let simplifier =
5089 ExprSimplifier::new(SimplifyContext::new(&props).with_schema(schema));
5090 let result = simplifier.simplify(expr);
5091 assert!(result.is_err(), "Expected error for invalid cast");
5092 let err_msg = result.unwrap_err().to_string();
5093 assert_contains!(err_msg, "Error parsing timestamp");
5094 }
5095
5096 fn if_not_null(expr: Expr, then: bool) -> Expr {
5097 Expr::Case(Case {
5098 expr: Some(expr.is_not_null().into()),
5099 when_then_expr: vec![(lit(true).into(), lit(then).into())],
5100 else_expr: None,
5101 })
5102 }
5103}