datafusion_physical_plan/aggregates/group_values/multi_group_by/
boolean.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::sync::Arc;
19
20use crate::aggregates::group_values::multi_group_by::Nulls;
21use crate::aggregates::group_values::multi_group_by::{nulls_equal_to, GroupColumn};
22use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder;
23use arrow::array::{Array as _, ArrayRef, AsArray, BooleanArray, BooleanBufferBuilder};
24use datafusion_common::Result;
25use itertools::izip;
26
27/// An implementation of [`GroupColumn`] for booleans
28///
29/// Optimized to skip null buffer construction if the input is known to be non nullable
30///
31/// # Template parameters
32///
33/// `NULLABLE`: if the data can contain any nulls
34#[derive(Debug)]
35pub struct BooleanGroupValueBuilder<const NULLABLE: bool> {
36    buffer: BooleanBufferBuilder,
37    nulls: MaybeNullBufferBuilder,
38}
39
40impl<const NULLABLE: bool> BooleanGroupValueBuilder<NULLABLE> {
41    /// Create a new `BooleanGroupValueBuilder`
42    pub fn new() -> Self {
43        Self {
44            buffer: BooleanBufferBuilder::new(0),
45            nulls: MaybeNullBufferBuilder::new(),
46        }
47    }
48}
49
50impl<const NULLABLE: bool> GroupColumn for BooleanGroupValueBuilder<NULLABLE> {
51    fn equal_to(&self, lhs_row: usize, array: &ArrayRef, rhs_row: usize) -> bool {
52        if NULLABLE {
53            let exist_null = self.nulls.is_null(lhs_row);
54            let input_null = array.is_null(rhs_row);
55            if let Some(result) = nulls_equal_to(exist_null, input_null) {
56                return result;
57            }
58        }
59
60        self.buffer.get_bit(lhs_row) == array.as_boolean().value(rhs_row)
61    }
62
63    fn append_val(&mut self, array: &ArrayRef, row: usize) -> Result<()> {
64        if NULLABLE {
65            if array.is_null(row) {
66                self.nulls.append(true);
67                self.buffer.append(bool::default());
68            } else {
69                self.nulls.append(false);
70                self.buffer.append(array.as_boolean().value(row));
71            }
72        } else {
73            self.buffer.append(array.as_boolean().value(row));
74        }
75
76        Ok(())
77    }
78
79    fn vectorized_equal_to(
80        &self,
81        lhs_rows: &[usize],
82        array: &ArrayRef,
83        rhs_rows: &[usize],
84        equal_to_results: &mut [bool],
85    ) {
86        let array = array.as_boolean();
87
88        let iter = izip!(
89            lhs_rows.iter(),
90            rhs_rows.iter(),
91            equal_to_results.iter_mut(),
92        );
93
94        for (&lhs_row, &rhs_row, equal_to_result) in iter {
95            // Has found not equal to in previous column, don't need to check
96            if !*equal_to_result {
97                continue;
98            }
99
100            if NULLABLE {
101                let exist_null = self.nulls.is_null(lhs_row);
102                let input_null = array.is_null(rhs_row);
103                if let Some(result) = nulls_equal_to(exist_null, input_null) {
104                    *equal_to_result = result;
105                    continue;
106                }
107            }
108
109            *equal_to_result = self.buffer.get_bit(lhs_row) == array.value(rhs_row);
110        }
111    }
112
113    fn vectorized_append(&mut self, array: &ArrayRef, rows: &[usize]) -> Result<()> {
114        let arr = array.as_boolean();
115
116        let null_count = array.null_count();
117        let num_rows = array.len();
118        let all_null_or_non_null = if null_count == 0 {
119            Nulls::None
120        } else if null_count == num_rows {
121            Nulls::All
122        } else {
123            Nulls::Some
124        };
125
126        match (NULLABLE, all_null_or_non_null) {
127            (true, Nulls::Some) => {
128                for &row in rows {
129                    if array.is_null(row) {
130                        self.nulls.append(true);
131                        self.buffer.append(bool::default());
132                    } else {
133                        self.nulls.append(false);
134                        self.buffer.append(arr.value(row));
135                    }
136                }
137            }
138
139            (true, Nulls::None) => {
140                self.nulls.append_n(rows.len(), false);
141                for &row in rows {
142                    self.buffer.append(arr.value(row));
143                }
144            }
145
146            (true, Nulls::All) => {
147                self.nulls.append_n(rows.len(), true);
148                self.buffer.append_n(rows.len(), bool::default());
149            }
150
151            (false, _) => {
152                for &row in rows {
153                    self.buffer.append(arr.value(row));
154                }
155            }
156        }
157
158        Ok(())
159    }
160
161    fn len(&self) -> usize {
162        self.buffer.len()
163    }
164
165    fn size(&self) -> usize {
166        self.buffer.capacity() / 8 + self.nulls.allocated_size()
167    }
168
169    fn build(self: Box<Self>) -> ArrayRef {
170        let Self { mut buffer, nulls } = *self;
171
172        let nulls = nulls.build();
173        if !NULLABLE {
174            assert!(nulls.is_none(), "unexpected nulls in non nullable input");
175        }
176
177        let arr = BooleanArray::new(buffer.finish(), nulls);
178
179        Arc::new(arr)
180    }
181
182    fn take_n(&mut self, n: usize) -> ArrayRef {
183        let first_n_nulls = if NULLABLE { self.nulls.take_n(n) } else { None };
184
185        let mut new_builder = BooleanBufferBuilder::new(self.buffer.len());
186        new_builder.append_packed_range(n..self.buffer.len(), self.buffer.as_slice());
187        std::mem::swap(&mut new_builder, &mut self.buffer);
188
189        // take only first n values from the original builder
190        new_builder.truncate(n);
191
192        Arc::new(BooleanArray::new(new_builder.finish(), first_n_nulls))
193    }
194}
195
196#[cfg(test)]
197mod tests {
198    use arrow::array::NullBufferBuilder;
199
200    use super::*;
201
202    #[test]
203    fn test_nullable_boolean_equal_to() {
204        let append = |builder: &mut BooleanGroupValueBuilder<true>,
205                      builder_array: &ArrayRef,
206                      append_rows: &[usize]| {
207            for &index in append_rows {
208                builder.append_val(builder_array, index).unwrap();
209            }
210        };
211
212        let equal_to = |builder: &BooleanGroupValueBuilder<true>,
213                        lhs_rows: &[usize],
214                        input_array: &ArrayRef,
215                        rhs_rows: &[usize],
216                        equal_to_results: &mut Vec<bool>| {
217            let iter = lhs_rows.iter().zip(rhs_rows.iter());
218            for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() {
219                equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row);
220            }
221        };
222
223        test_nullable_boolean_equal_to_internal(append, equal_to);
224    }
225
226    #[test]
227    fn test_nullable_primitive_vectorized_equal_to() {
228        let append = |builder: &mut BooleanGroupValueBuilder<true>,
229                      builder_array: &ArrayRef,
230                      append_rows: &[usize]| {
231            builder
232                .vectorized_append(builder_array, append_rows)
233                .unwrap();
234        };
235
236        let equal_to = |builder: &BooleanGroupValueBuilder<true>,
237                        lhs_rows: &[usize],
238                        input_array: &ArrayRef,
239                        rhs_rows: &[usize],
240                        equal_to_results: &mut Vec<bool>| {
241            builder.vectorized_equal_to(
242                lhs_rows,
243                input_array,
244                rhs_rows,
245                equal_to_results,
246            );
247        };
248
249        test_nullable_boolean_equal_to_internal(append, equal_to);
250    }
251
252    fn test_nullable_boolean_equal_to_internal<A, E>(mut append: A, mut equal_to: E)
253    where
254        A: FnMut(&mut BooleanGroupValueBuilder<true>, &ArrayRef, &[usize]),
255        E: FnMut(
256            &BooleanGroupValueBuilder<true>,
257            &[usize],
258            &ArrayRef,
259            &[usize],
260            &mut Vec<bool>,
261        ),
262    {
263        // Will cover such cases:
264        //   - exist null, input not null
265        //   - exist null, input null; values not equal
266        //   - exist null, input null; values equal
267        //   - exist not null, input null
268        //   - exist not null, input not null; values not equal
269        //   - exist not null, input not null; values equal
270
271        // Define PrimitiveGroupValueBuilder
272        let mut builder = BooleanGroupValueBuilder::<true>::new();
273        let builder_array = Arc::new(BooleanArray::from(vec![
274            None,
275            None,
276            None,
277            Some(true),
278            Some(false),
279            Some(true),
280        ])) as ArrayRef;
281        append(&mut builder, &builder_array, &[0, 1, 2, 3, 4, 5]);
282
283        // Define input array
284        let (values, _nulls) = BooleanArray::from(vec![
285            Some(true),
286            Some(false),
287            None,
288            None,
289            Some(true),
290            Some(true),
291        ])
292        .into_parts();
293
294        // explicitly build a null buffer where one of the null values also happens to match
295        let mut nulls = NullBufferBuilder::new(6);
296        nulls.append_non_null();
297        nulls.append_null(); // this sets Some(false) to null above
298        nulls.append_null();
299        nulls.append_null();
300        nulls.append_non_null();
301        nulls.append_non_null();
302        let input_array = Arc::new(BooleanArray::new(values, nulls.finish())) as ArrayRef;
303
304        // Check
305        let mut equal_to_results = vec![true; builder.len()];
306        equal_to(
307            &builder,
308            &[0, 1, 2, 3, 4, 5],
309            &input_array,
310            &[0, 1, 2, 3, 4, 5],
311            &mut equal_to_results,
312        );
313
314        assert!(!equal_to_results[0]);
315        assert!(equal_to_results[1]);
316        assert!(equal_to_results[2]);
317        assert!(!equal_to_results[3]);
318        assert!(!equal_to_results[4]);
319        assert!(equal_to_results[5]);
320    }
321
322    #[test]
323    fn test_not_nullable_primitive_equal_to() {
324        let append = |builder: &mut BooleanGroupValueBuilder<false>,
325                      builder_array: &ArrayRef,
326                      append_rows: &[usize]| {
327            for &index in append_rows {
328                builder.append_val(builder_array, index).unwrap();
329            }
330        };
331
332        let equal_to = |builder: &BooleanGroupValueBuilder<false>,
333                        lhs_rows: &[usize],
334                        input_array: &ArrayRef,
335                        rhs_rows: &[usize],
336                        equal_to_results: &mut Vec<bool>| {
337            let iter = lhs_rows.iter().zip(rhs_rows.iter());
338            for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() {
339                equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row);
340            }
341        };
342
343        test_not_nullable_boolean_equal_to_internal(append, equal_to);
344    }
345
346    #[test]
347    fn test_not_nullable_primitive_vectorized_equal_to() {
348        let append = |builder: &mut BooleanGroupValueBuilder<false>,
349                      builder_array: &ArrayRef,
350                      append_rows: &[usize]| {
351            builder
352                .vectorized_append(builder_array, append_rows)
353                .unwrap();
354        };
355
356        let equal_to = |builder: &BooleanGroupValueBuilder<false>,
357                        lhs_rows: &[usize],
358                        input_array: &ArrayRef,
359                        rhs_rows: &[usize],
360                        equal_to_results: &mut Vec<bool>| {
361            builder.vectorized_equal_to(
362                lhs_rows,
363                input_array,
364                rhs_rows,
365                equal_to_results,
366            );
367        };
368
369        test_not_nullable_boolean_equal_to_internal(append, equal_to);
370    }
371
372    fn test_not_nullable_boolean_equal_to_internal<A, E>(mut append: A, mut equal_to: E)
373    where
374        A: FnMut(&mut BooleanGroupValueBuilder<false>, &ArrayRef, &[usize]),
375        E: FnMut(
376            &BooleanGroupValueBuilder<false>,
377            &[usize],
378            &ArrayRef,
379            &[usize],
380            &mut Vec<bool>,
381        ),
382    {
383        // Will cover such cases:
384        //   - values equal
385        //   - values not equal
386
387        // Define PrimitiveGroupValueBuilder
388        let mut builder = BooleanGroupValueBuilder::<false>::new();
389        let builder_array = Arc::new(BooleanArray::from(vec![
390            Some(false),
391            Some(true),
392            Some(false),
393            Some(true),
394        ])) as ArrayRef;
395        append(&mut builder, &builder_array, &[0, 1, 2, 3]);
396
397        // Define input array
398        let input_array = Arc::new(BooleanArray::from(vec![
399            Some(false),
400            Some(false),
401            Some(true),
402            Some(true),
403        ])) as ArrayRef;
404
405        // Check
406        let mut equal_to_results = vec![true; builder.len()];
407        equal_to(
408            &builder,
409            &[0, 1, 2, 3],
410            &input_array,
411            &[0, 1, 2, 3],
412            &mut equal_to_results,
413        );
414
415        assert!(equal_to_results[0]);
416        assert!(!equal_to_results[1]);
417        assert!(!equal_to_results[2]);
418        assert!(equal_to_results[3]);
419    }
420
421    #[test]
422    fn test_nullable_boolean_vectorized_operation_special_case() {
423        // Test the special `all nulls` or `not nulls` input array case
424        // for vectorized append and equal to
425
426        let mut builder = BooleanGroupValueBuilder::<true>::new();
427
428        // All nulls input array
429        let all_nulls_input_array =
430            Arc::new(BooleanArray::from(vec![None, None, None, None, None])) as _;
431        builder
432            .vectorized_append(&all_nulls_input_array, &[0, 1, 2, 3, 4])
433            .unwrap();
434
435        let mut equal_to_results = vec![true; all_nulls_input_array.len()];
436        builder.vectorized_equal_to(
437            &[0, 1, 2, 3, 4],
438            &all_nulls_input_array,
439            &[0, 1, 2, 3, 4],
440            &mut equal_to_results,
441        );
442
443        assert!(equal_to_results[0]);
444        assert!(equal_to_results[1]);
445        assert!(equal_to_results[2]);
446        assert!(equal_to_results[3]);
447        assert!(equal_to_results[4]);
448
449        // All not nulls input array
450        let all_not_nulls_input_array = Arc::new(BooleanArray::from(vec![
451            Some(false),
452            Some(true),
453            Some(false),
454            Some(true),
455            Some(true),
456        ])) as _;
457        builder
458            .vectorized_append(&all_not_nulls_input_array, &[0, 1, 2, 3, 4])
459            .unwrap();
460
461        let mut equal_to_results = vec![true; all_not_nulls_input_array.len()];
462        builder.vectorized_equal_to(
463            &[5, 6, 7, 8, 9],
464            &all_not_nulls_input_array,
465            &[0, 1, 2, 3, 4],
466            &mut equal_to_results,
467        );
468
469        assert!(equal_to_results[0]);
470        assert!(equal_to_results[1]);
471        assert!(equal_to_results[2]);
472        assert!(equal_to_results[3]);
473        assert!(equal_to_results[4]);
474    }
475}