datafusion_optimizer/simplify_expressions/inlist_simplifier.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! This module implements a rule that simplifies the values for `InList`s
19
20use super::THRESHOLD_INLINE_INLIST;
21
22use datafusion_common::tree_node::{Transformed, TreeNodeRewriter};
23use datafusion_common::Result;
24use datafusion_expr::expr::InList;
25use datafusion_expr::Expr;
26
27pub(super) struct ShortenInListSimplifier {}
28
29impl ShortenInListSimplifier {
30 pub(super) fn new() -> Self {
31 Self {}
32 }
33}
34
35impl TreeNodeRewriter for ShortenInListSimplifier {
36 type Node = Expr;
37
38 fn f_up(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
39 // if expr is a single column reference:
40 // expr IN (A, B, ...) --> (expr = A) OR (expr = B) OR (expr = C)
41 if let Expr::InList(InList {
42 ref expr,
43 ref list,
44 negated,
45 }) = expr
46 {
47 if !list.is_empty()
48 && (
49 // For lists with only 1 value we allow more complex expressions to be simplified
50 // e.g SUBSTR(c1, 2, 3) IN ('1') -> SUBSTR(c1, 2, 3) = '1'
51 // for more than one we avoid repeating this potentially expensive
52 // expressions
53 list.len() == 1
54 || list.len() <= THRESHOLD_INLINE_INLIST
55 && expr.try_as_col().is_some()
56 )
57 {
58 let first_val = list[0].clone();
59 if negated {
60 return Ok(Transformed::yes(list.iter().skip(1).cloned().fold(
61 (*expr.clone()).not_eq(first_val),
62 |acc, y| {
63 // Note that `A and B and C and D` is a left-deep tree structure
64 // as such we want to maintain this structure as much as possible
65 // to avoid reordering the expression during each optimization
66 // pass.
67 //
68 // Left-deep tree structure for `A and B and C and D`:
69 // ```
70 // &
71 // / \
72 // & D
73 // / \
74 // & C
75 // / \
76 // A B
77 // ```
78 //
79 // The code below maintain the left-deep tree structure.
80 acc.and((*expr.clone()).not_eq(y))
81 },
82 )));
83 } else {
84 return Ok(Transformed::yes(list.iter().skip(1).cloned().fold(
85 (*expr.clone()).eq(first_val),
86 |acc, y| {
87 // Same reasoning as above
88 acc.or((*expr.clone()).eq(y))
89 },
90 )));
91 }
92 }
93 }
94
95 Ok(Transformed::no(expr))
96 }
97}