datafusion_optimizer/simplify_expressions/
inlist_simplifier.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! This module implements a rule that simplifies the values for `InList`s
19
20use super::THRESHOLD_INLINE_INLIST;
21
22use datafusion_common::tree_node::{Transformed, TreeNodeRewriter};
23use datafusion_common::Result;
24use datafusion_expr::expr::InList;
25use datafusion_expr::Expr;
26
27pub(super) struct ShortenInListSimplifier {}
28
29impl ShortenInListSimplifier {
30    pub(super) fn new() -> Self {
31        Self {}
32    }
33}
34
35impl TreeNodeRewriter for ShortenInListSimplifier {
36    type Node = Expr;
37
38    fn f_up(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
39        // if expr is a single column reference:
40        // expr IN (A, B, ...) --> (expr = A) OR (expr = B) OR (expr = C)
41        if let Expr::InList(InList {
42            ref expr,
43            ref list,
44            negated,
45        }) = expr
46        {
47            if !list.is_empty()
48                && (
49                    // For lists with only 1 value we allow more complex expressions to be simplified
50                    // e.g SUBSTR(c1, 2, 3) IN ('1') -> SUBSTR(c1, 2, 3) = '1'
51                    // for more than one we avoid repeating this potentially expensive
52                    // expressions
53                    list.len() == 1
54                        || list.len() <= THRESHOLD_INLINE_INLIST
55                            && expr.try_as_col().is_some()
56                )
57            {
58                let first_val = list[0].clone();
59                if negated {
60                    return Ok(Transformed::yes(list.iter().skip(1).cloned().fold(
61                        (*expr.clone()).not_eq(first_val),
62                        |acc, y| {
63                            // Note that `A and B and C and D` is a left-deep tree structure
64                            // as such we want to maintain this structure as much as possible
65                            // to avoid reordering the expression during each optimization
66                            // pass.
67                            //
68                            // Left-deep tree structure for `A and B and C and D`:
69                            // ```
70                            //        &
71                            //       / \
72                            //      &   D
73                            //     / \
74                            //    &   C
75                            //   / \
76                            //  A   B
77                            // ```
78                            //
79                            // The code below maintain the left-deep tree structure.
80                            acc.and((*expr.clone()).not_eq(y))
81                        },
82                    )));
83                } else {
84                    return Ok(Transformed::yes(list.iter().skip(1).cloned().fold(
85                        (*expr.clone()).eq(first_val),
86                        |acc, y| {
87                            // Same reasoning as above
88                            acc.or((*expr.clone()).eq(y))
89                        },
90                    )));
91                }
92            }
93        }
94
95        Ok(Transformed::no(expr))
96    }
97}