datafusion_physical_plan/aggregates/group_values/null_builder.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use arrow::array::NullBufferBuilder;
19use arrow::buffer::NullBuffer;
20
21/// Builder for an (optional) null mask
22///
23/// Optimized for avoid creating the bitmask when all values are non-null
24#[derive(Debug)]
25pub(crate) struct MaybeNullBufferBuilder {
26 /// Note this is an Arrow *VALIDITY* buffer (so it is false for nulls, true
27 /// for non-nulls)
28 nulls: NullBufferBuilder,
29}
30
31impl MaybeNullBufferBuilder {
32 /// Create a new builder
33 pub fn new() -> Self {
34 Self {
35 nulls: NullBufferBuilder::new(0),
36 }
37 }
38
39 /// Return true if the row at index `row` is null
40 pub fn is_null(&self, row: usize) -> bool {
41 match self.nulls.as_slice() {
42 // validity mask means a unset bit is NULL
43 Some(_) => !self.nulls.is_valid(row),
44 None => false,
45 }
46 }
47
48 /// Set the nullness of the next row to `is_null`
49 ///
50 /// If `value` is true, the row is null.
51 /// If `value` is false, the row is non null
52 pub fn append(&mut self, is_null: bool) {
53 self.nulls.append(!is_null)
54 }
55
56 pub fn append_n(&mut self, n: usize, is_null: bool) {
57 if is_null {
58 self.nulls.append_n_nulls(n);
59 } else {
60 self.nulls.append_n_non_nulls(n);
61 }
62 }
63
64 /// return the number of heap allocated bytes used by this structure to store boolean values
65 pub fn allocated_size(&self) -> usize {
66 // NullBufferBuilder builder::allocated_size returns capacity in bits
67 self.nulls.allocated_size() / 8
68 }
69
70 /// Return a NullBuffer representing the accumulated nulls so far
71 pub fn build(mut self) -> Option<NullBuffer> {
72 self.nulls.finish()
73 }
74
75 /// Returns a NullBuffer representing the first `n` rows accumulated so far
76 /// shifting any remaining down by `n`
77 pub fn take_n(&mut self, n: usize) -> Option<NullBuffer> {
78 // Copy over the values at n..len-1 values to the start of a
79 // new builder and leave it in self
80 //
81 // TODO: it would be great to use something like `set_bits` from arrow here.
82 let mut new_builder = NullBufferBuilder::new(self.nulls.len());
83 for i in n..self.nulls.len() {
84 new_builder.append(self.nulls.is_valid(i));
85 }
86 std::mem::swap(&mut new_builder, &mut self.nulls);
87
88 // take only first n values from the original builder
89 new_builder.truncate(n);
90 new_builder.finish()
91 }
92}