datafusion/
schema_equivalence.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use arrow::datatypes::{DataType, Field, Fields, Schema};
19
20/// Verifies whether the original planned schema can be satisfied with data
21/// adhering to the candidate schema. In practice, this is equality check on the
22/// schemas except that original schema can have nullable fields where candidate
23/// is constrained to not provide null data.
24pub(crate) fn schema_satisfied_by(original: &Schema, candidate: &Schema) -> bool {
25    original.metadata() == candidate.metadata()
26        && fields_satisfied_by(original.fields(), candidate.fields())
27}
28
29/// See [`schema_satisfied_by`] for the contract.
30fn fields_satisfied_by(original: &Fields, candidate: &Fields) -> bool {
31    original.len() == candidate.len()
32        && original
33            .iter()
34            .zip(candidate)
35            .all(|(original, candidate)| field_satisfied_by(original, candidate))
36}
37
38/// See [`schema_satisfied_by`] for the contract.
39fn field_satisfied_by(original: &Field, candidate: &Field) -> bool {
40    original.name() == candidate.name()
41        && (original.is_nullable() || !candidate.is_nullable())
42        && original.metadata() == candidate.metadata()
43        && data_type_satisfied_by(original.data_type(), candidate.data_type())
44}
45
46/// See [`schema_satisfied_by`] for the contract.
47fn data_type_satisfied_by(original: &DataType, candidate: &DataType) -> bool {
48    match (original, candidate) {
49        (DataType::List(original_field), DataType::List(candidate_field)) => {
50            field_satisfied_by(original_field, candidate_field)
51        }
52
53        (DataType::ListView(original_field), DataType::ListView(candidate_field)) => {
54            field_satisfied_by(original_field, candidate_field)
55        }
56
57        (
58            DataType::FixedSizeList(original_field, original_size),
59            DataType::FixedSizeList(candidate_field, candidate_size),
60        ) => {
61            original_size == candidate_size
62                && field_satisfied_by(original_field, candidate_field)
63        }
64
65        (DataType::LargeList(original_field), DataType::LargeList(candidate_field)) => {
66            field_satisfied_by(original_field, candidate_field)
67        }
68
69        (
70            DataType::LargeListView(original_field),
71            DataType::LargeListView(candidate_field),
72        ) => field_satisfied_by(original_field, candidate_field),
73
74        (DataType::Struct(original_fields), DataType::Struct(candidate_fields)) => {
75            fields_satisfied_by(original_fields, candidate_fields)
76        }
77
78        // TODO (DataType::Union(, _), DataType::Union(_, _)) => {}
79        // TODO (DataType::Dictionary(_, _), DataType::Dictionary(_, _)) => {}
80        // TODO (DataType::Map(_, _), DataType::Map(_, _)) => {}
81        // TODO (DataType::RunEndEncoded(_, _), DataType::RunEndEncoded(_, _)) => {}
82        _ => original == candidate,
83    }
84}