datafusion_common/scalar/
cache.rs1use std::iter::repeat_n;
21use std::sync::{Arc, LazyLock, Mutex};
22
23use arrow::array::{new_null_array, Array, ArrayRef, PrimitiveArray};
24use arrow::datatypes::{
25 ArrowDictionaryKeyType, DataType, Int16Type, Int32Type, Int64Type, Int8Type,
26 UInt16Type, UInt32Type, UInt64Type, UInt8Type,
27};
28
29const MAX_CACHE_SIZE: usize = 1024 * 1024;
31
32#[derive(Debug)]
40struct KeyArrayCache<K: ArrowDictionaryKeyType> {
41 cache: Option<(usize, bool, PrimitiveArray<K>)>, }
43
44impl<K: ArrowDictionaryKeyType> Default for KeyArrayCache<K> {
45 fn default() -> Self {
46 Self { cache: None }
47 }
48}
49
50impl<K: ArrowDictionaryKeyType> KeyArrayCache<K> {
51 fn get_or_create(&mut self, num_rows: usize, is_null: bool) -> PrimitiveArray<K> {
53 if num_rows > MAX_CACHE_SIZE {
55 return self.create_key_array(num_rows, is_null);
57 }
58
59 match &self.cache {
60 Some((cached_num_rows, cached_is_null, cached_array))
61 if *cached_num_rows == num_rows && *cached_is_null == is_null =>
62 {
63 cached_array.clone()
65 }
66 _ => {
67 let key_array = self.create_key_array(num_rows, is_null);
69 self.cache = Some((num_rows, is_null, key_array.clone()));
70 key_array
71 }
72 }
73 }
74
75 fn create_key_array(&self, num_rows: usize, is_null: bool) -> PrimitiveArray<K> {
77 let key_array: PrimitiveArray<K> = repeat_n(
78 if is_null {
79 None
80 } else {
81 Some(K::default_value())
82 },
83 num_rows,
84 )
85 .collect();
86 key_array
87 }
88}
89
90#[derive(Debug, Default)]
93struct NullArrayCache {
94 cache: Option<(usize, ArrayRef)>, }
96
97impl NullArrayCache {
98 fn get_or_create(&mut self, num_rows: usize) -> ArrayRef {
100 if num_rows > MAX_CACHE_SIZE {
102 return new_null_array(&DataType::Null, num_rows);
104 }
105
106 match &self.cache {
107 Some((cached_num_rows, cached_array)) if *cached_num_rows == num_rows => {
108 Arc::clone(cached_array)
110 }
111 _ => {
112 let null_array = new_null_array(&DataType::Null, num_rows);
114 self.cache = Some((num_rows, Arc::clone(&null_array)));
115 null_array
116 }
117 }
118 }
119}
120
121#[derive(Debug, Default)]
123struct ArrayCaches {
124 cache_i8: KeyArrayCache<Int8Type>,
125 cache_i16: KeyArrayCache<Int16Type>,
126 cache_i32: KeyArrayCache<Int32Type>,
127 cache_i64: KeyArrayCache<Int64Type>,
128 cache_u8: KeyArrayCache<UInt8Type>,
129 cache_u16: KeyArrayCache<UInt16Type>,
130 cache_u32: KeyArrayCache<UInt32Type>,
131 cache_u64: KeyArrayCache<UInt64Type>,
132 null_cache: NullArrayCache,
133}
134
135static ARRAY_CACHES: LazyLock<Mutex<ArrayCaches>> =
136 LazyLock::new(|| Mutex::new(ArrayCaches::default()));
137
138fn get_array_caches() -> &'static Mutex<ArrayCaches> {
140 &ARRAY_CACHES
141}
142
143pub(crate) fn get_or_create_cached_null_array(num_rows: usize) -> ArrayRef {
145 let cache = get_array_caches();
146 let mut caches = cache.lock().unwrap();
147 caches.null_cache.get_or_create(num_rows)
148}
149
150pub(crate) fn get_or_create_cached_key_array<K: ArrowDictionaryKeyType>(
152 num_rows: usize,
153 is_null: bool,
154) -> PrimitiveArray<K> {
155 let cache = get_array_caches();
156 let mut caches = cache.lock().unwrap();
157
158 match K::DATA_TYPE {
160 DataType::Int8 => {
161 let array = caches.cache_i8.get_or_create(num_rows, is_null);
162 let array_data = array.to_data();
164 PrimitiveArray::<K>::from(array_data)
165 }
166 DataType::Int16 => {
167 let array = caches.cache_i16.get_or_create(num_rows, is_null);
168 let array_data = array.to_data();
169 PrimitiveArray::<K>::from(array_data)
170 }
171 DataType::Int32 => {
172 let array = caches.cache_i32.get_or_create(num_rows, is_null);
173 let array_data = array.to_data();
174 PrimitiveArray::<K>::from(array_data)
175 }
176 DataType::Int64 => {
177 let array = caches.cache_i64.get_or_create(num_rows, is_null);
178 let array_data = array.to_data();
179 PrimitiveArray::<K>::from(array_data)
180 }
181 DataType::UInt8 => {
182 let array = caches.cache_u8.get_or_create(num_rows, is_null);
183 let array_data = array.to_data();
184 PrimitiveArray::<K>::from(array_data)
185 }
186 DataType::UInt16 => {
187 let array = caches.cache_u16.get_or_create(num_rows, is_null);
188 let array_data = array.to_data();
189 PrimitiveArray::<K>::from(array_data)
190 }
191 DataType::UInt32 => {
192 let array = caches.cache_u32.get_or_create(num_rows, is_null);
193 let array_data = array.to_data();
194 PrimitiveArray::<K>::from(array_data)
195 }
196 DataType::UInt64 => {
197 let array = caches.cache_u64.get_or_create(num_rows, is_null);
198 let array_data = array.to_data();
199 PrimitiveArray::<K>::from(array_data)
200 }
201 _ => {
202 let key_array: PrimitiveArray<K> = repeat_n(
204 if is_null {
205 None
206 } else {
207 Some(K::default_value())
208 },
209 num_rows,
210 )
211 .collect();
212 key_array
213 }
214 }
215}