datafusion_physical_plan/aggregates/group_values/
null_builder.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use arrow::array::NullBufferBuilder;
19use arrow::buffer::NullBuffer;
20
21/// Builder for an (optional) null mask
22///
23/// Optimized for avoid creating the bitmask when all values are non-null
24#[derive(Debug)]
25pub(crate) struct MaybeNullBufferBuilder {
26    /// Note this is an Arrow *VALIDITY* buffer (so it is false for nulls, true
27    /// for non-nulls)
28    nulls: NullBufferBuilder,
29}
30
31impl MaybeNullBufferBuilder {
32    /// Create a new builder
33    pub fn new() -> Self {
34        Self {
35            nulls: NullBufferBuilder::new(0),
36        }
37    }
38
39    /// Return true if the row at index `row` is null
40    pub fn is_null(&self, row: usize) -> bool {
41        match self.nulls.as_slice() {
42            // validity mask means a unset bit is NULL
43            Some(_) => !self.nulls.is_valid(row),
44            None => false,
45        }
46    }
47
48    /// Set the nullness of the next row to `is_null`
49    ///
50    /// If `value` is true, the row is null.
51    /// If `value` is false, the row is non null
52    pub fn append(&mut self, is_null: bool) {
53        self.nulls.append(!is_null)
54    }
55
56    pub fn append_n(&mut self, n: usize, is_null: bool) {
57        if is_null {
58            self.nulls.append_n_nulls(n);
59        } else {
60            self.nulls.append_n_non_nulls(n);
61        }
62    }
63
64    /// return the number of heap allocated bytes used by this structure to store boolean values
65    pub fn allocated_size(&self) -> usize {
66        // NullBufferBuilder builder::allocated_size returns capacity in bits
67        self.nulls.allocated_size() / 8
68    }
69
70    /// Return a NullBuffer representing the accumulated nulls so far
71    pub fn build(mut self) -> Option<NullBuffer> {
72        self.nulls.finish()
73    }
74
75    /// Returns a NullBuffer representing the first `n` rows accumulated so far
76    /// shifting any remaining down by `n`
77    pub fn take_n(&mut self, n: usize) -> Option<NullBuffer> {
78        // Copy over the values at  n..len-1 values to the start of a
79        // new builder and leave it in self
80        //
81        // TODO: it would be great to use something like `set_bits` from arrow here.
82        let mut new_builder = NullBufferBuilder::new(self.nulls.len());
83        for i in n..self.nulls.len() {
84            new_builder.append(self.nulls.is_valid(i));
85        }
86        std::mem::swap(&mut new_builder, &mut self.nulls);
87
88        // take only first n values from the original builder
89        new_builder.truncate(n);
90        new_builder.finish()
91    }
92}