datafusion_optimizer/
plan_signature.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::{
19    collections::hash_map::DefaultHasher,
20    hash::{Hash, Hasher},
21    num::NonZeroUsize,
22};
23
24use datafusion_common::tree_node::TreeNodeRecursion;
25use datafusion_expr::LogicalPlan;
26
27/// Non-unique identifier of a [`LogicalPlan`].
28///
29/// See [`LogicalPlanSignature::new`] for details.
30#[derive(Clone, Copy, PartialEq, Eq, Hash)]
31pub struct LogicalPlanSignature {
32    node_number: NonZeroUsize,
33    plan_hash: u64,
34}
35
36impl LogicalPlanSignature {
37    /// Returns [`LogicalPlanSignature`] of the given [`LogicalPlan`].
38    ///
39    /// It is a kind of [`LogicalPlan`] hashing with stronger guarantees.
40    ///
41    /// # Guarantees
42    ///
43    /// Consider two [`LogicalPlan`]s `p1` and `p2`.
44    ///
45    /// If `p1` and `p2` have a different number of [`LogicalPlan`]s, then
46    /// they will have different [`LogicalPlanSignature`]s.
47    ///
48    /// If `p1` and `p2` have a different [`Hash`], then
49    /// they will have different [`LogicalPlanSignature`]s.
50    ///
51    /// # Caveats
52    ///
53    /// The intention of [`LogicalPlanSignature`] is to have a lower chance
54    /// of hash collisions.
55    ///
56    /// There exist different [`LogicalPlan`]s with the same
57    /// [`LogicalPlanSignature`].
58    ///
59    /// When two [`LogicalPlan`]s differ only in metadata, then they will have
60    /// the same [`LogicalPlanSignature`]s (due to hash implementation in
61    /// [`LogicalPlan`]).
62    pub fn new(plan: &LogicalPlan) -> Self {
63        let mut hasher = DefaultHasher::new();
64        plan.hash(&mut hasher);
65
66        Self {
67            node_number: get_node_number(plan),
68            plan_hash: hasher.finish(),
69        }
70    }
71}
72
73/// Get total number of [`LogicalPlan`]s in the plan.
74fn get_node_number(plan: &LogicalPlan) -> NonZeroUsize {
75    let mut node_number = 0;
76    plan.apply_with_subqueries(|_plan| {
77        node_number += 1;
78        Ok(TreeNodeRecursion::Continue)
79    })
80    // Closure always return Ok
81    .unwrap();
82    // Visitor must have at least visited the root,
83    // so v.node_number is at least 1.
84    NonZeroUsize::new(node_number).unwrap()
85}
86
87#[cfg(test)]
88mod tests {
89    use std::sync::Arc;
90
91    use datafusion_common::{DFSchema, Result};
92    use datafusion_expr::{lit, LogicalPlan};
93
94    use crate::plan_signature::get_node_number;
95
96    #[test]
97    fn node_number_for_some_plan() -> Result<()> {
98        let schema = Arc::new(DFSchema::empty());
99
100        let one_node_plan =
101            Arc::new(LogicalPlan::EmptyRelation(datafusion_expr::EmptyRelation {
102                produce_one_row: false,
103                schema: Arc::clone(&schema),
104            }));
105
106        assert_eq!(1, get_node_number(&one_node_plan).get());
107
108        let two_node_plan = Arc::new(LogicalPlan::Projection(
109            datafusion_expr::Projection::try_new(vec![lit(1), lit(2)], one_node_plan)?,
110        ));
111
112        assert_eq!(2, get_node_number(&two_node_plan).get());
113
114        let five_node_plan = Arc::new(LogicalPlan::Union(datafusion_expr::Union {
115            inputs: vec![Arc::clone(&two_node_plan), two_node_plan],
116            schema,
117        }));
118
119        assert_eq!(5, get_node_number(&five_node_plan).get());
120
121        Ok(())
122    }
123}