datafusion_physical_expr/expressions/binary/
kernels.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! This module contains computation kernels that are specific to
19//! datafusion and not (yet) targeted to  port upstream to arrow
20use arrow::array::*;
21use arrow::compute::kernels::bitwise::{
22    bitwise_and, bitwise_and_scalar, bitwise_or, bitwise_or_scalar, bitwise_shift_left,
23    bitwise_shift_left_scalar, bitwise_shift_right, bitwise_shift_right_scalar,
24    bitwise_xor, bitwise_xor_scalar,
25};
26use arrow::compute::kernels::boolean::not;
27use arrow::compute::kernels::comparison::{regexp_is_match, regexp_is_match_scalar};
28use arrow::datatypes::DataType;
29use arrow::error::ArrowError;
30use datafusion_common::{internal_err, plan_err};
31use datafusion_common::{Result, ScalarValue};
32
33use std::sync::Arc;
34
35/// Downcasts $LEFT and $RIGHT to $ARRAY_TYPE and then calls $KERNEL($LEFT, $RIGHT)
36macro_rules! call_kernel {
37    ($LEFT:expr, $RIGHT:expr, $KERNEL:expr, $ARRAY_TYPE:ident) => {{
38        let left = $LEFT.as_any().downcast_ref::<$ARRAY_TYPE>().unwrap();
39        let right = $RIGHT.as_any().downcast_ref::<$ARRAY_TYPE>().unwrap();
40        let result: $ARRAY_TYPE = $KERNEL(left, right)?;
41        Ok(Arc::new(result))
42    }};
43}
44
45/// Creates a $FUNC(left: ArrayRef, right: ArrayRef) that
46/// downcasts left / right to the appropriate integral type and calls the kernel
47macro_rules! create_left_integral_dyn_kernel {
48    ($FUNC:ident, $KERNEL:ident) => {
49        pub(crate) fn $FUNC(left: ArrayRef, right: ArrayRef) -> Result<ArrayRef> {
50            match &left.data_type() {
51                DataType::Int8 => {
52                    call_kernel!(left, right, $KERNEL, Int8Array)
53                }
54                DataType::Int16 => {
55                    call_kernel!(left, right, $KERNEL, Int16Array)
56                }
57                DataType::Int32 => {
58                    call_kernel!(left, right, $KERNEL, Int32Array)
59                }
60                DataType::Int64 => {
61                    call_kernel!(left, right, $KERNEL, Int64Array)
62                }
63                DataType::UInt8 => {
64                    call_kernel!(left, right, $KERNEL, UInt8Array)
65                }
66                DataType::UInt16 => {
67                    call_kernel!(left, right, $KERNEL, UInt16Array)
68                }
69                DataType::UInt32 => {
70                    call_kernel!(left, right, $KERNEL, UInt32Array)
71                }
72                DataType::UInt64 => {
73                    call_kernel!(left, right, $KERNEL, UInt64Array)
74                }
75                other => plan_err!(
76                    "Data type {} not supported for binary operation '{}' on dyn arrays",
77                    other,
78                    stringify!($KERNEL)
79                ),
80            }
81        }
82    };
83}
84
85create_left_integral_dyn_kernel!(bitwise_or_dyn, bitwise_or);
86create_left_integral_dyn_kernel!(bitwise_xor_dyn, bitwise_xor);
87create_left_integral_dyn_kernel!(bitwise_and_dyn, bitwise_and);
88create_left_integral_dyn_kernel!(bitwise_shift_right_dyn, bitwise_shift_right);
89create_left_integral_dyn_kernel!(bitwise_shift_left_dyn, bitwise_shift_left);
90
91/// Downcasts $LEFT as $ARRAY_TYPE and $RIGHT as TYPE and calls $KERNEL($LEFT, $RIGHT)
92macro_rules! call_scalar_kernel {
93    ($LEFT:expr, $RIGHT:expr, $KERNEL:ident, $ARRAY_TYPE:ident, $TYPE:ty) => {{
94        let len = $LEFT.len();
95        let array = $LEFT.as_any().downcast_ref::<$ARRAY_TYPE>().unwrap();
96        let scalar = $RIGHT;
97        if scalar.is_null() {
98            Ok(new_null_array(array.data_type(), len))
99        } else {
100            let scalar: $TYPE = scalar.try_into().unwrap();
101            let result: $ARRAY_TYPE = $KERNEL(array, scalar).unwrap();
102            Ok(Arc::new(result) as ArrayRef)
103        }
104    }};
105}
106
107/// Creates a $FUNC(left: ArrayRef, right: ScalarValue) that
108/// downcasts left / right to the appropriate integral type and calls the kernel
109macro_rules! create_left_integral_dyn_scalar_kernel {
110    ($FUNC:ident, $KERNEL:ident) => {
111        pub(crate) fn $FUNC(array: &dyn Array, scalar: ScalarValue) -> Option<Result<ArrayRef>> {
112            let result = match array.data_type() {
113                DataType::Int8 => call_scalar_kernel!(array, scalar, $KERNEL, Int8Array, i8),
114                DataType::Int16 => call_scalar_kernel!(array, scalar, $KERNEL, Int16Array, i16),
115                DataType::Int32 => call_scalar_kernel!(array, scalar, $KERNEL, Int32Array, i32),
116                DataType::Int64 => call_scalar_kernel!(array, scalar, $KERNEL, Int64Array, i64),
117                DataType::UInt8 => call_scalar_kernel!(array, scalar, $KERNEL, UInt8Array, u8),
118                DataType::UInt16 => call_scalar_kernel!(array, scalar, $KERNEL, UInt16Array, u16),
119                DataType::UInt32 => call_scalar_kernel!(array, scalar, $KERNEL, UInt32Array, u32),
120                DataType::UInt64 => call_scalar_kernel!(array, scalar, $KERNEL, UInt64Array, u64),
121                other => plan_err!(
122                    "Data type {} not supported for binary operation '{}' on dyn arrays",
123                    other,
124                    stringify!($KERNEL)
125                ),
126            };
127            Some(result)
128        }
129    };
130}
131
132create_left_integral_dyn_scalar_kernel!(bitwise_and_dyn_scalar, bitwise_and_scalar);
133create_left_integral_dyn_scalar_kernel!(bitwise_or_dyn_scalar, bitwise_or_scalar);
134create_left_integral_dyn_scalar_kernel!(bitwise_xor_dyn_scalar, bitwise_xor_scalar);
135create_left_integral_dyn_scalar_kernel!(
136    bitwise_shift_right_dyn_scalar,
137    bitwise_shift_right_scalar
138);
139create_left_integral_dyn_scalar_kernel!(
140    bitwise_shift_left_dyn_scalar,
141    bitwise_shift_left_scalar
142);
143
144/// Concatenates two `StringViewArray`s element-wise.  
145/// If either element is `Null`, the result element is also `Null`.
146///
147/// # Errors
148/// - Returns an error if the input arrays have different lengths.  
149/// - Returns an error if any concatenated string exceeds `u32::MAX` (≈4 GB) in length.
150pub fn concat_elements_utf8view(
151    left: &StringViewArray,
152    right: &StringViewArray,
153) -> std::result::Result<StringViewArray, ArrowError> {
154    if left.len() != right.len() {
155        return Err(ArrowError::ComputeError(format!(
156            "Arrays must have the same length: {} != {}",
157            left.len(),
158            right.len()
159        )));
160    }
161    let capacity = left.len();
162    let mut result = StringViewBuilder::with_capacity(capacity);
163
164    // Avoid reallocations by writing to a reused buffer (note we
165    // could be even more efficient r by creating the view directly
166    // here and avoid the buffer but that would be more complex)
167    let mut buffer = String::new();
168
169    for (left, right) in left.iter().zip(right.iter()) {
170        if let (Some(left), Some(right)) = (left, right) {
171            use std::fmt::Write;
172            buffer.clear();
173            write!(&mut buffer, "{left}{right}")
174                .expect("writing into string buffer failed");
175            result.try_append_value(&buffer)?;
176        } else {
177            // at least one of the values is null, so the output is also null
178            result.append_null()
179        }
180    }
181    Ok(result.finish())
182}
183
184/// Invoke a compute kernel on a pair of binary data arrays with flags
185macro_rules! regexp_is_match_flag {
186    ($LEFT:expr, $RIGHT:expr, $ARRAYTYPE:ident, $NOT:expr, $FLAG:expr) => {{
187        let ll = $LEFT
188            .as_any()
189            .downcast_ref::<$ARRAYTYPE>()
190            .expect("failed to downcast array");
191        let rr = $RIGHT
192            .as_any()
193            .downcast_ref::<$ARRAYTYPE>()
194            .expect("failed to downcast array");
195
196        let flag = if $FLAG {
197            Some($ARRAYTYPE::from(vec!["i"; ll.len()]))
198        } else {
199            None
200        };
201        let mut array = regexp_is_match(ll, rr, flag.as_ref())?;
202        if $NOT {
203            array = not(&array).unwrap();
204        }
205        Ok(Arc::new(array))
206    }};
207}
208
209pub(crate) fn regex_match_dyn(
210    left: ArrayRef,
211    right: ArrayRef,
212    not_match: bool,
213    flag: bool,
214) -> Result<ArrayRef> {
215    match left.data_type() {
216        DataType::Utf8 => {
217            regexp_is_match_flag!(left, right, StringArray, not_match, flag)
218        }
219        DataType::Utf8View => {
220            regexp_is_match_flag!(left, right, StringViewArray, not_match, flag)
221        }
222        DataType::LargeUtf8 => {
223            regexp_is_match_flag!(left, right, LargeStringArray, not_match, flag)
224        }
225        other => internal_err!(
226            "Data type {} not supported for regex_match_dyn on string array",
227            other
228        ),
229    }
230}
231
232/// Invoke a compute kernel on a data array and a scalar value with flag
233macro_rules! regexp_is_match_flag_scalar {
234    ($LEFT:expr, $RIGHT:expr, $ARRAYTYPE:ident, $NOT:expr, $FLAG:expr) => {{
235        let ll = $LEFT
236            .as_any()
237            .downcast_ref::<$ARRAYTYPE>()
238            .expect("failed to downcast array");
239
240        if let Some(Some(string_value)) = $RIGHT.try_as_str() {
241            let flag = $FLAG.then_some("i");
242            match regexp_is_match_scalar(ll, &string_value, flag) {
243                Ok(mut array) => {
244                    if $NOT {
245                        array = not(&array).unwrap();
246                    }
247                    Ok(Arc::new(array))
248                }
249                Err(e) => internal_err!("failed to call 'regex_match_dyn_scalar' {}", e),
250            }
251        } else {
252            internal_err!(
253                "failed to cast literal value {} for operation 'regex_match_dyn_scalar'",
254                $RIGHT
255            )
256        }
257    }};
258}
259
260pub(crate) fn regex_match_dyn_scalar(
261    left: &dyn Array,
262    right: ScalarValue,
263    not_match: bool,
264    flag: bool,
265) -> Option<Result<ArrayRef>> {
266    let result: Result<ArrayRef> = match left.data_type() {
267        DataType::Utf8 => {
268            regexp_is_match_flag_scalar!(left, right, StringArray, not_match, flag)
269        }
270        DataType::Utf8View => {
271            regexp_is_match_flag_scalar!(left, right, StringViewArray, not_match, flag)
272        }
273        DataType::LargeUtf8 => {
274            regexp_is_match_flag_scalar!(left, right, LargeStringArray, not_match, flag)
275        }
276        DataType::Dictionary(_, _) => {
277            let values = left.as_any_dictionary().values();
278
279            match values.data_type() {
280                DataType::Utf8 => regexp_is_match_flag_scalar!(values, right, StringArray, not_match, flag),
281                DataType::Utf8View => regexp_is_match_flag_scalar!(values, right, StringViewArray, not_match, flag),
282                DataType::LargeUtf8 => regexp_is_match_flag_scalar!(values, right, LargeStringArray, not_match, flag),
283                other => internal_err!(
284                    "Data type {} not supported as a dictionary value type for operation 'regex_match_dyn_scalar' on string array",
285                    other
286                ),
287            }.map(
288                // downcast_dictionary_array duplicates code per possible key type, so we aim to do all prep work before
289                |evaluated_values| downcast_dictionary_array! {
290                    left => {
291                        let unpacked_dict = evaluated_values.take_iter(left.keys().iter().map(|opt| opt.map(|v| v as _))).collect::<BooleanArray>();
292                        Arc::new(unpacked_dict) as ArrayRef
293                    },
294                    _ => unreachable!(),
295                }
296            )
297        }
298        other => internal_err!(
299                "Data type {} not supported for operation 'regex_match_dyn_scalar' on string array",
300                other
301        ),
302    };
303    Some(result)
304}