datafusion_common/scalar/
mod.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`ScalarValue`]: stores single  values
19
20mod cache;
21mod consts;
22mod struct_builder;
23
24use std::borrow::Borrow;
25use std::cmp::Ordering;
26use std::collections::{HashSet, VecDeque};
27use std::convert::Infallible;
28use std::fmt;
29use std::hash::Hash;
30use std::hash::Hasher;
31use std::iter::repeat_n;
32use std::mem::{size_of, size_of_val};
33use std::str::FromStr;
34use std::sync::Arc;
35
36use crate::cast::{
37    as_binary_array, as_binary_view_array, as_boolean_array, as_date32_array,
38    as_date64_array, as_decimal128_array, as_decimal256_array, as_decimal32_array,
39    as_decimal64_array, as_dictionary_array, as_duration_microsecond_array,
40    as_duration_millisecond_array, as_duration_nanosecond_array,
41    as_duration_second_array, as_fixed_size_binary_array, as_fixed_size_list_array,
42    as_float16_array, as_float32_array, as_float64_array, as_int16_array, as_int32_array,
43    as_int64_array, as_int8_array, as_interval_dt_array, as_interval_mdn_array,
44    as_interval_ym_array, as_large_binary_array, as_large_list_array,
45    as_large_string_array, as_string_array, as_string_view_array,
46    as_time32_millisecond_array, as_time32_second_array, as_time64_microsecond_array,
47    as_time64_nanosecond_array, as_timestamp_microsecond_array,
48    as_timestamp_millisecond_array, as_timestamp_nanosecond_array,
49    as_timestamp_second_array, as_uint16_array, as_uint32_array, as_uint64_array,
50    as_uint8_array, as_union_array,
51};
52use crate::error::{DataFusionError, Result, _exec_err, _internal_err, _not_impl_err};
53use crate::format::DEFAULT_CAST_OPTIONS;
54use crate::hash_utils::create_hashes;
55use crate::utils::SingleRowListArrayBuilder;
56use crate::{_internal_datafusion_err, arrow_datafusion_err};
57use arrow::array::{
58    new_empty_array, new_null_array, Array, ArrayData, ArrayRef, ArrowNativeTypeOp,
59    ArrowPrimitiveType, AsArray, BinaryArray, BinaryViewArray, BooleanArray, Date32Array,
60    Date64Array, Decimal128Array, Decimal256Array, Decimal32Array, Decimal64Array,
61    DictionaryArray, DurationMicrosecondArray, DurationMillisecondArray,
62    DurationNanosecondArray, DurationSecondArray, FixedSizeBinaryArray,
63    FixedSizeListArray, Float16Array, Float32Array, Float64Array, GenericListArray,
64    Int16Array, Int32Array, Int64Array, Int8Array, IntervalDayTimeArray,
65    IntervalMonthDayNanoArray, IntervalYearMonthArray, LargeBinaryArray, LargeListArray,
66    LargeStringArray, ListArray, MapArray, MutableArrayData, OffsetSizeTrait,
67    PrimitiveArray, Scalar, StringArray, StringViewArray, StructArray,
68    Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray,
69    Time64NanosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray,
70    TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt32Array,
71    UInt64Array, UInt8Array, UnionArray,
72};
73use arrow::buffer::{BooleanBuffer, ScalarBuffer};
74use arrow::compute::kernels::cast::{cast_with_options, CastOptions};
75use arrow::compute::kernels::numeric::{
76    add, add_wrapping, div, mul, mul_wrapping, rem, sub, sub_wrapping,
77};
78use arrow::datatypes::{
79    i256, validate_decimal_precision_and_scale, ArrowDictionaryKeyType, ArrowNativeType,
80    ArrowTimestampType, DataType, Date32Type, Decimal128Type, Decimal256Type,
81    Decimal32Type, Decimal64Type, Field, Float32Type, Int16Type, Int32Type, Int64Type,
82    Int8Type, IntervalDayTime, IntervalDayTimeType, IntervalMonthDayNano,
83    IntervalMonthDayNanoType, IntervalUnit, IntervalYearMonthType, TimeUnit,
84    TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType,
85    TimestampSecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type, UnionFields,
86    UnionMode, DECIMAL128_MAX_PRECISION,
87};
88use arrow::util::display::{array_value_to_string, ArrayFormatter, FormatOptions};
89use cache::{get_or_create_cached_key_array, get_or_create_cached_null_array};
90use chrono::{Duration, NaiveDate};
91use half::f16;
92pub use struct_builder::ScalarStructBuilder;
93
94/// A dynamically typed, nullable single value.
95///
96/// While an arrow  [`Array`]) stores one or more values of the same type, in a
97/// single column, a `ScalarValue` stores a single value of a single type, the
98/// equivalent of 1 row and one column.
99///
100/// ```text
101///  ┌────────┐
102///  │ value1 │
103///  │ value2 │                  ┌────────┐
104///  │ value3 │                  │ value2 │
105///  │  ...   │                  └────────┘
106///  │ valueN │
107///  └────────┘
108///
109///    Array                     ScalarValue
110///
111/// stores multiple,             stores a single,
112/// possibly null, values of     possible null, value
113/// the same type
114/// ```
115///
116/// # Performance
117///
118/// In general, performance will be better using arrow [`Array`]s rather than
119/// [`ScalarValue`], as it is far more efficient to process multiple values at
120/// once (vectorized processing).
121///
122/// # Example
123/// ```
124/// # use datafusion_common::ScalarValue;
125/// // Create single scalar value for an Int32 value
126/// let s1 = ScalarValue::Int32(Some(10));
127///
128/// // You can also create values using the From impl:
129/// let s2 = ScalarValue::from(10i32);
130/// assert_eq!(s1, s2);
131/// ```
132///
133/// # Null Handling
134///
135/// `ScalarValue` represents null values in the same way as Arrow. Nulls are
136/// "typed" in the sense that a null value in an [`Int32Array`] is different
137/// from a null value in a [`Float64Array`], and is different from the values in
138/// a [`NullArray`].
139///
140/// ```
141/// # fn main() -> datafusion_common::Result<()> {
142/// # use std::collections::hash_set::Difference;
143/// # use datafusion_common::ScalarValue;
144/// # use arrow::datatypes::DataType;
145/// // You can create a 'null' Int32 value directly:
146/// let s1 = ScalarValue::Int32(None);
147///
148/// // You can also create a null value for a given datatype:
149/// let s2 = ScalarValue::try_from(&DataType::Int32)?;
150/// assert_eq!(s1, s2);
151///
152/// // Note that this is DIFFERENT than a `ScalarValue::Null`
153/// let s3 = ScalarValue::Null;
154/// assert_ne!(s1, s3);
155/// # Ok(())
156/// # }
157/// ```
158///
159/// # Nested Types
160///
161/// `List` / `LargeList` / `FixedSizeList` / `Struct` / `Map` are represented as a
162/// single element array of the corresponding type.
163///
164/// ## Example: Creating [`ScalarValue::Struct`] using [`ScalarStructBuilder`]
165/// ```
166/// # use std::sync::Arc;
167/// # use arrow::datatypes::{DataType, Field};
168/// # use datafusion_common::{ScalarValue, scalar::ScalarStructBuilder};
169/// // Build a struct like: {a: 1, b: "foo"}
170/// let field_a = Field::new("a", DataType::Int32, false);
171/// let field_b = Field::new("b", DataType::Utf8, false);
172///
173/// let s1 = ScalarStructBuilder::new()
174///     .with_scalar(field_a, ScalarValue::from(1i32))
175///     .with_scalar(field_b, ScalarValue::from("foo"))
176///     .build();
177/// ```
178///
179/// ## Example: Creating a null [`ScalarValue::Struct`] using [`ScalarStructBuilder`]
180/// ```
181/// # use std::sync::Arc;
182/// # use arrow::datatypes::{DataType, Field};
183/// # use datafusion_common::{ScalarValue, scalar::ScalarStructBuilder};
184/// // Build a struct representing a NULL value
185/// let fields = vec![
186///     Field::new("a", DataType::Int32, false),
187///     Field::new("b", DataType::Utf8, false),
188/// ];
189///
190/// let s1 = ScalarStructBuilder::new_null(fields);
191/// ```
192///
193/// ## Example: Creating [`ScalarValue::Struct`] directly
194/// ```
195/// # use std::sync::Arc;
196/// # use arrow::datatypes::{DataType, Field, Fields};
197/// # use arrow::array::{ArrayRef, Int32Array, StructArray, StringArray};
198/// # use datafusion_common::ScalarValue;
199/// // Build a struct like: {a: 1, b: "foo"}
200/// // Field description
201/// let fields = Fields::from(vec![
202///     Field::new("a", DataType::Int32, false),
203///     Field::new("b", DataType::Utf8, false),
204/// ]);
205/// // one row arrays for each field
206/// let arrays: Vec<ArrayRef> = vec![
207///     Arc::new(Int32Array::from(vec![1])),
208///     Arc::new(StringArray::from(vec!["foo"])),
209/// ];
210/// // no nulls for this array
211/// let nulls = None;
212/// let arr = StructArray::new(fields, arrays, nulls);
213///
214/// // Create a ScalarValue::Struct directly
215/// let s1 = ScalarValue::Struct(Arc::new(arr));
216/// ```
217///
218///
219/// # Further Reading
220/// See [datatypes](https://arrow.apache.org/docs/python/api/datatypes.html) for
221/// details on datatypes and the [format](https://github.com/apache/arrow/blob/master/format/Schema.fbs#L354-L375)
222/// for the definitive reference.
223///
224/// [`NullArray`]: arrow::array::NullArray
225#[derive(Clone)]
226pub enum ScalarValue {
227    /// represents `DataType::Null` (castable to/from any other type)
228    Null,
229    /// true or false value
230    Boolean(Option<bool>),
231    /// 16bit float
232    Float16(Option<f16>),
233    /// 32bit float
234    Float32(Option<f32>),
235    /// 64bit float
236    Float64(Option<f64>),
237    /// 32bit decimal, using the i32 to represent the decimal, precision scale
238    Decimal32(Option<i32>, u8, i8),
239    /// 64bit decimal, using the i64 to represent the decimal, precision scale
240    Decimal64(Option<i64>, u8, i8),
241    /// 128bit decimal, using the i128 to represent the decimal, precision scale
242    Decimal128(Option<i128>, u8, i8),
243    /// 256bit decimal, using the i256 to represent the decimal, precision scale
244    Decimal256(Option<i256>, u8, i8),
245    /// signed 8bit int
246    Int8(Option<i8>),
247    /// signed 16bit int
248    Int16(Option<i16>),
249    /// signed 32bit int
250    Int32(Option<i32>),
251    /// signed 64bit int
252    Int64(Option<i64>),
253    /// unsigned 8bit int
254    UInt8(Option<u8>),
255    /// unsigned 16bit int
256    UInt16(Option<u16>),
257    /// unsigned 32bit int
258    UInt32(Option<u32>),
259    /// unsigned 64bit int
260    UInt64(Option<u64>),
261    /// utf-8 encoded string.
262    Utf8(Option<String>),
263    /// utf-8 encoded string but from view types.
264    Utf8View(Option<String>),
265    /// utf-8 encoded string representing a LargeString's arrow type.
266    LargeUtf8(Option<String>),
267    /// binary
268    Binary(Option<Vec<u8>>),
269    /// binary but from view types.
270    BinaryView(Option<Vec<u8>>),
271    /// fixed size binary
272    FixedSizeBinary(i32, Option<Vec<u8>>),
273    /// large binary
274    LargeBinary(Option<Vec<u8>>),
275    /// Fixed size list scalar.
276    ///
277    /// The array must be a FixedSizeListArray with length 1.
278    FixedSizeList(Arc<FixedSizeListArray>),
279    /// Represents a single element of a [`ListArray`] as an [`ArrayRef`]
280    ///
281    /// The array must be a ListArray with length 1.
282    List(Arc<ListArray>),
283    /// The array must be a LargeListArray with length 1.
284    LargeList(Arc<LargeListArray>),
285    /// Represents a single element [`StructArray`] as an [`ArrayRef`]. See
286    /// [`ScalarValue`] for examples of how to create instances of this type.
287    Struct(Arc<StructArray>),
288    /// Represents a single element [`MapArray`] as an [`ArrayRef`].
289    Map(Arc<MapArray>),
290    /// Date stored as a signed 32bit int days since UNIX epoch 1970-01-01
291    Date32(Option<i32>),
292    /// Date stored as a signed 64bit int milliseconds since UNIX epoch 1970-01-01
293    Date64(Option<i64>),
294    /// Time stored as a signed 32bit int as seconds since midnight
295    Time32Second(Option<i32>),
296    /// Time stored as a signed 32bit int as milliseconds since midnight
297    Time32Millisecond(Option<i32>),
298    /// Time stored as a signed 64bit int as microseconds since midnight
299    Time64Microsecond(Option<i64>),
300    /// Time stored as a signed 64bit int as nanoseconds since midnight
301    Time64Nanosecond(Option<i64>),
302    /// Timestamp Second
303    TimestampSecond(Option<i64>, Option<Arc<str>>),
304    /// Timestamp Milliseconds
305    TimestampMillisecond(Option<i64>, Option<Arc<str>>),
306    /// Timestamp Microseconds
307    TimestampMicrosecond(Option<i64>, Option<Arc<str>>),
308    /// Timestamp Nanoseconds
309    TimestampNanosecond(Option<i64>, Option<Arc<str>>),
310    /// Number of elapsed whole months
311    IntervalYearMonth(Option<i32>),
312    /// Number of elapsed days and milliseconds (no leap seconds)
313    /// stored as 2 contiguous 32-bit signed integers
314    IntervalDayTime(Option<IntervalDayTime>),
315    /// A triple of the number of elapsed months, days, and nanoseconds.
316    /// Months and days are encoded as 32-bit signed integers.
317    /// Nanoseconds is encoded as a 64-bit signed integer (no leap seconds).
318    IntervalMonthDayNano(Option<IntervalMonthDayNano>),
319    /// Duration in seconds
320    DurationSecond(Option<i64>),
321    /// Duration in milliseconds
322    DurationMillisecond(Option<i64>),
323    /// Duration in microseconds
324    DurationMicrosecond(Option<i64>),
325    /// Duration in nanoseconds
326    DurationNanosecond(Option<i64>),
327    /// A nested datatype that can represent slots of differing types. Components:
328    /// `.0`: a tuple of union `type_id` and the single value held by this Scalar
329    /// `.1`: the list of fields, zero-to-one of which will by set in `.0`
330    /// `.2`: the physical storage of the source/destination UnionArray from which this Scalar came
331    Union(Option<(i8, Box<ScalarValue>)>, UnionFields, UnionMode),
332    /// Dictionary type: index type and value
333    Dictionary(Box<DataType>, Box<ScalarValue>),
334}
335
336impl Hash for Fl<f16> {
337    fn hash<H: Hasher>(&self, state: &mut H) {
338        self.0.to_bits().hash(state);
339    }
340}
341
342// manual implementation of `PartialEq`
343impl PartialEq for ScalarValue {
344    fn eq(&self, other: &Self) -> bool {
345        use ScalarValue::*;
346        // This purposely doesn't have a catch-all "(_, _)" so that
347        // any newly added enum variant will require editing this list
348        // or else face a compile error
349        match (self, other) {
350            (Decimal32(v1, p1, s1), Decimal32(v2, p2, s2)) => {
351                v1.eq(v2) && p1.eq(p2) && s1.eq(s2)
352            }
353            (Decimal32(_, _, _), _) => false,
354            (Decimal64(v1, p1, s1), Decimal64(v2, p2, s2)) => {
355                v1.eq(v2) && p1.eq(p2) && s1.eq(s2)
356            }
357            (Decimal64(_, _, _), _) => false,
358            (Decimal128(v1, p1, s1), Decimal128(v2, p2, s2)) => {
359                v1.eq(v2) && p1.eq(p2) && s1.eq(s2)
360            }
361            (Decimal128(_, _, _), _) => false,
362            (Decimal256(v1, p1, s1), Decimal256(v2, p2, s2)) => {
363                v1.eq(v2) && p1.eq(p2) && s1.eq(s2)
364            }
365            (Decimal256(_, _, _), _) => false,
366            (Boolean(v1), Boolean(v2)) => v1.eq(v2),
367            (Boolean(_), _) => false,
368            (Float32(v1), Float32(v2)) => match (v1, v2) {
369                (Some(f1), Some(f2)) => f1.to_bits() == f2.to_bits(),
370                _ => v1.eq(v2),
371            },
372            (Float16(v1), Float16(v2)) => match (v1, v2) {
373                (Some(f1), Some(f2)) => f1.to_bits() == f2.to_bits(),
374                _ => v1.eq(v2),
375            },
376            (Float32(_), _) => false,
377            (Float16(_), _) => false,
378            (Float64(v1), Float64(v2)) => match (v1, v2) {
379                (Some(f1), Some(f2)) => f1.to_bits() == f2.to_bits(),
380                _ => v1.eq(v2),
381            },
382            (Float64(_), _) => false,
383            (Int8(v1), Int8(v2)) => v1.eq(v2),
384            (Int8(_), _) => false,
385            (Int16(v1), Int16(v2)) => v1.eq(v2),
386            (Int16(_), _) => false,
387            (Int32(v1), Int32(v2)) => v1.eq(v2),
388            (Int32(_), _) => false,
389            (Int64(v1), Int64(v2)) => v1.eq(v2),
390            (Int64(_), _) => false,
391            (UInt8(v1), UInt8(v2)) => v1.eq(v2),
392            (UInt8(_), _) => false,
393            (UInt16(v1), UInt16(v2)) => v1.eq(v2),
394            (UInt16(_), _) => false,
395            (UInt32(v1), UInt32(v2)) => v1.eq(v2),
396            (UInt32(_), _) => false,
397            (UInt64(v1), UInt64(v2)) => v1.eq(v2),
398            (UInt64(_), _) => false,
399            (Utf8(v1), Utf8(v2)) => v1.eq(v2),
400            (Utf8(_), _) => false,
401            (Utf8View(v1), Utf8View(v2)) => v1.eq(v2),
402            (Utf8View(_), _) => false,
403            (LargeUtf8(v1), LargeUtf8(v2)) => v1.eq(v2),
404            (LargeUtf8(_), _) => false,
405            (Binary(v1), Binary(v2)) => v1.eq(v2),
406            (Binary(_), _) => false,
407            (BinaryView(v1), BinaryView(v2)) => v1.eq(v2),
408            (BinaryView(_), _) => false,
409            (FixedSizeBinary(_, v1), FixedSizeBinary(_, v2)) => v1.eq(v2),
410            (FixedSizeBinary(_, _), _) => false,
411            (LargeBinary(v1), LargeBinary(v2)) => v1.eq(v2),
412            (LargeBinary(_), _) => false,
413            (FixedSizeList(v1), FixedSizeList(v2)) => v1.eq(v2),
414            (FixedSizeList(_), _) => false,
415            (List(v1), List(v2)) => v1.eq(v2),
416            (List(_), _) => false,
417            (LargeList(v1), LargeList(v2)) => v1.eq(v2),
418            (LargeList(_), _) => false,
419            (Struct(v1), Struct(v2)) => v1.eq(v2),
420            (Struct(_), _) => false,
421            (Map(v1), Map(v2)) => v1.eq(v2),
422            (Map(_), _) => false,
423            (Date32(v1), Date32(v2)) => v1.eq(v2),
424            (Date32(_), _) => false,
425            (Date64(v1), Date64(v2)) => v1.eq(v2),
426            (Date64(_), _) => false,
427            (Time32Second(v1), Time32Second(v2)) => v1.eq(v2),
428            (Time32Second(_), _) => false,
429            (Time32Millisecond(v1), Time32Millisecond(v2)) => v1.eq(v2),
430            (Time32Millisecond(_), _) => false,
431            (Time64Microsecond(v1), Time64Microsecond(v2)) => v1.eq(v2),
432            (Time64Microsecond(_), _) => false,
433            (Time64Nanosecond(v1), Time64Nanosecond(v2)) => v1.eq(v2),
434            (Time64Nanosecond(_), _) => false,
435            (TimestampSecond(v1, _), TimestampSecond(v2, _)) => v1.eq(v2),
436            (TimestampSecond(_, _), _) => false,
437            (TimestampMillisecond(v1, _), TimestampMillisecond(v2, _)) => v1.eq(v2),
438            (TimestampMillisecond(_, _), _) => false,
439            (TimestampMicrosecond(v1, _), TimestampMicrosecond(v2, _)) => v1.eq(v2),
440            (TimestampMicrosecond(_, _), _) => false,
441            (TimestampNanosecond(v1, _), TimestampNanosecond(v2, _)) => v1.eq(v2),
442            (TimestampNanosecond(_, _), _) => false,
443            (DurationSecond(v1), DurationSecond(v2)) => v1.eq(v2),
444            (DurationSecond(_), _) => false,
445            (DurationMillisecond(v1), DurationMillisecond(v2)) => v1.eq(v2),
446            (DurationMillisecond(_), _) => false,
447            (DurationMicrosecond(v1), DurationMicrosecond(v2)) => v1.eq(v2),
448            (DurationMicrosecond(_), _) => false,
449            (DurationNanosecond(v1), DurationNanosecond(v2)) => v1.eq(v2),
450            (DurationNanosecond(_), _) => false,
451            (IntervalYearMonth(v1), IntervalYearMonth(v2)) => v1.eq(v2),
452            (IntervalYearMonth(_), _) => false,
453            (IntervalDayTime(v1), IntervalDayTime(v2)) => v1.eq(v2),
454            (IntervalDayTime(_), _) => false,
455            (IntervalMonthDayNano(v1), IntervalMonthDayNano(v2)) => v1.eq(v2),
456            (IntervalMonthDayNano(_), _) => false,
457            (Union(val1, fields1, mode1), Union(val2, fields2, mode2)) => {
458                val1.eq(val2) && fields1.eq(fields2) && mode1.eq(mode2)
459            }
460            (Union(_, _, _), _) => false,
461            (Dictionary(k1, v1), Dictionary(k2, v2)) => k1.eq(k2) && v1.eq(v2),
462            (Dictionary(_, _), _) => false,
463            (Null, Null) => true,
464            (Null, _) => false,
465        }
466    }
467}
468
469// manual implementation of `PartialOrd`
470impl PartialOrd for ScalarValue {
471    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
472        use ScalarValue::*;
473        // This purposely doesn't have a catch-all "(_, _)" so that
474        // any newly added enum variant will require editing this list
475        // or else face a compile error
476        match (self, other) {
477            (Decimal32(v1, p1, s1), Decimal32(v2, p2, s2)) => {
478                if p1.eq(p2) && s1.eq(s2) {
479                    v1.partial_cmp(v2)
480                } else {
481                    // Two decimal values can be compared if they have the same precision and scale.
482                    None
483                }
484            }
485            (Decimal32(_, _, _), _) => None,
486            (Decimal64(v1, p1, s1), Decimal64(v2, p2, s2)) => {
487                if p1.eq(p2) && s1.eq(s2) {
488                    v1.partial_cmp(v2)
489                } else {
490                    // Two decimal values can be compared if they have the same precision and scale.
491                    None
492                }
493            }
494            (Decimal64(_, _, _), _) => None,
495            (Decimal128(v1, p1, s1), Decimal128(v2, p2, s2)) => {
496                if p1.eq(p2) && s1.eq(s2) {
497                    v1.partial_cmp(v2)
498                } else {
499                    // Two decimal values can be compared if they have the same precision and scale.
500                    None
501                }
502            }
503            (Decimal128(_, _, _), _) => None,
504            (Decimal256(v1, p1, s1), Decimal256(v2, p2, s2)) => {
505                if p1.eq(p2) && s1.eq(s2) {
506                    v1.partial_cmp(v2)
507                } else {
508                    // Two decimal values can be compared if they have the same precision and scale.
509                    None
510                }
511            }
512            (Decimal256(_, _, _), _) => None,
513            (Boolean(v1), Boolean(v2)) => v1.partial_cmp(v2),
514            (Boolean(_), _) => None,
515            (Float32(v1), Float32(v2)) => match (v1, v2) {
516                (Some(f1), Some(f2)) => Some(f1.total_cmp(f2)),
517                _ => v1.partial_cmp(v2),
518            },
519            (Float16(v1), Float16(v2)) => match (v1, v2) {
520                (Some(f1), Some(f2)) => Some(f1.total_cmp(f2)),
521                _ => v1.partial_cmp(v2),
522            },
523            (Float32(_), _) => None,
524            (Float16(_), _) => None,
525            (Float64(v1), Float64(v2)) => match (v1, v2) {
526                (Some(f1), Some(f2)) => Some(f1.total_cmp(f2)),
527                _ => v1.partial_cmp(v2),
528            },
529            (Float64(_), _) => None,
530            (Int8(v1), Int8(v2)) => v1.partial_cmp(v2),
531            (Int8(_), _) => None,
532            (Int16(v1), Int16(v2)) => v1.partial_cmp(v2),
533            (Int16(_), _) => None,
534            (Int32(v1), Int32(v2)) => v1.partial_cmp(v2),
535            (Int32(_), _) => None,
536            (Int64(v1), Int64(v2)) => v1.partial_cmp(v2),
537            (Int64(_), _) => None,
538            (UInt8(v1), UInt8(v2)) => v1.partial_cmp(v2),
539            (UInt8(_), _) => None,
540            (UInt16(v1), UInt16(v2)) => v1.partial_cmp(v2),
541            (UInt16(_), _) => None,
542            (UInt32(v1), UInt32(v2)) => v1.partial_cmp(v2),
543            (UInt32(_), _) => None,
544            (UInt64(v1), UInt64(v2)) => v1.partial_cmp(v2),
545            (UInt64(_), _) => None,
546            (Utf8(v1), Utf8(v2)) => v1.partial_cmp(v2),
547            (Utf8(_), _) => None,
548            (LargeUtf8(v1), LargeUtf8(v2)) => v1.partial_cmp(v2),
549            (LargeUtf8(_), _) => None,
550            (Utf8View(v1), Utf8View(v2)) => v1.partial_cmp(v2),
551            (Utf8View(_), _) => None,
552            (Binary(v1), Binary(v2)) => v1.partial_cmp(v2),
553            (Binary(_), _) => None,
554            (BinaryView(v1), BinaryView(v2)) => v1.partial_cmp(v2),
555            (BinaryView(_), _) => None,
556            (FixedSizeBinary(_, v1), FixedSizeBinary(_, v2)) => v1.partial_cmp(v2),
557            (FixedSizeBinary(_, _), _) => None,
558            (LargeBinary(v1), LargeBinary(v2)) => v1.partial_cmp(v2),
559            (LargeBinary(_), _) => None,
560            // ScalarValue::List / ScalarValue::FixedSizeList / ScalarValue::LargeList are ensure to have length 1
561            (List(arr1), List(arr2)) => partial_cmp_list(arr1.as_ref(), arr2.as_ref()),
562            (FixedSizeList(arr1), FixedSizeList(arr2)) => {
563                partial_cmp_list(arr1.as_ref(), arr2.as_ref())
564            }
565            (LargeList(arr1), LargeList(arr2)) => {
566                partial_cmp_list(arr1.as_ref(), arr2.as_ref())
567            }
568            (List(_), _) | (LargeList(_), _) | (FixedSizeList(_), _) => None,
569            (Struct(struct_arr1), Struct(struct_arr2)) => {
570                partial_cmp_struct(struct_arr1.as_ref(), struct_arr2.as_ref())
571            }
572            (Struct(_), _) => None,
573            (Map(map_arr1), Map(map_arr2)) => partial_cmp_map(map_arr1, map_arr2),
574            (Map(_), _) => None,
575            (Date32(v1), Date32(v2)) => v1.partial_cmp(v2),
576            (Date32(_), _) => None,
577            (Date64(v1), Date64(v2)) => v1.partial_cmp(v2),
578            (Date64(_), _) => None,
579            (Time32Second(v1), Time32Second(v2)) => v1.partial_cmp(v2),
580            (Time32Second(_), _) => None,
581            (Time32Millisecond(v1), Time32Millisecond(v2)) => v1.partial_cmp(v2),
582            (Time32Millisecond(_), _) => None,
583            (Time64Microsecond(v1), Time64Microsecond(v2)) => v1.partial_cmp(v2),
584            (Time64Microsecond(_), _) => None,
585            (Time64Nanosecond(v1), Time64Nanosecond(v2)) => v1.partial_cmp(v2),
586            (Time64Nanosecond(_), _) => None,
587            (TimestampSecond(v1, _), TimestampSecond(v2, _)) => v1.partial_cmp(v2),
588            (TimestampSecond(_, _), _) => None,
589            (TimestampMillisecond(v1, _), TimestampMillisecond(v2, _)) => {
590                v1.partial_cmp(v2)
591            }
592            (TimestampMillisecond(_, _), _) => None,
593            (TimestampMicrosecond(v1, _), TimestampMicrosecond(v2, _)) => {
594                v1.partial_cmp(v2)
595            }
596            (TimestampMicrosecond(_, _), _) => None,
597            (TimestampNanosecond(v1, _), TimestampNanosecond(v2, _)) => {
598                v1.partial_cmp(v2)
599            }
600            (TimestampNanosecond(_, _), _) => None,
601            (IntervalYearMonth(v1), IntervalYearMonth(v2)) => v1.partial_cmp(v2),
602            (IntervalYearMonth(_), _) => None,
603            (IntervalDayTime(v1), IntervalDayTime(v2)) => v1.partial_cmp(v2),
604            (IntervalDayTime(_), _) => None,
605            (IntervalMonthDayNano(v1), IntervalMonthDayNano(v2)) => v1.partial_cmp(v2),
606            (IntervalMonthDayNano(_), _) => None,
607            (DurationSecond(v1), DurationSecond(v2)) => v1.partial_cmp(v2),
608            (DurationSecond(_), _) => None,
609            (DurationMillisecond(v1), DurationMillisecond(v2)) => v1.partial_cmp(v2),
610            (DurationMillisecond(_), _) => None,
611            (DurationMicrosecond(v1), DurationMicrosecond(v2)) => v1.partial_cmp(v2),
612            (DurationMicrosecond(_), _) => None,
613            (DurationNanosecond(v1), DurationNanosecond(v2)) => v1.partial_cmp(v2),
614            (DurationNanosecond(_), _) => None,
615            (Union(v1, t1, m1), Union(v2, t2, m2)) => {
616                if t1.eq(t2) && m1.eq(m2) {
617                    v1.partial_cmp(v2)
618                } else {
619                    None
620                }
621            }
622            (Union(_, _, _), _) => None,
623            (Dictionary(k1, v1), Dictionary(k2, v2)) => {
624                // Don't compare if the key types don't match (it is effectively a different datatype)
625                if k1 == k2 {
626                    v1.partial_cmp(v2)
627                } else {
628                    None
629                }
630            }
631            (Dictionary(_, _), _) => None,
632            (Null, Null) => Some(Ordering::Equal),
633            (Null, _) => None,
634        }
635    }
636}
637
638/// List/LargeList/FixedSizeList scalars always have a single element
639/// array. This function returns that array
640fn first_array_for_list(arr: &dyn Array) -> ArrayRef {
641    assert_eq!(arr.len(), 1);
642    if let Some(arr) = arr.as_list_opt::<i32>() {
643        arr.value(0)
644    } else if let Some(arr) = arr.as_list_opt::<i64>() {
645        arr.value(0)
646    } else if let Some(arr) = arr.as_fixed_size_list_opt() {
647        arr.value(0)
648    } else {
649        unreachable!("Since only List / LargeList / FixedSizeList are supported, this should never happen")
650    }
651}
652
653/// Compares two List/LargeList/FixedSizeList scalars
654fn partial_cmp_list(arr1: &dyn Array, arr2: &dyn Array) -> Option<Ordering> {
655    if arr1.data_type() != arr2.data_type() {
656        return None;
657    }
658    let arr1 = first_array_for_list(arr1);
659    let arr2 = first_array_for_list(arr2);
660
661    let min_length = arr1.len().min(arr2.len());
662    let arr1_trimmed = arr1.slice(0, min_length);
663    let arr2_trimmed = arr2.slice(0, min_length);
664
665    let lt_res = arrow::compute::kernels::cmp::lt(&arr1_trimmed, &arr2_trimmed).ok()?;
666    let eq_res = arrow::compute::kernels::cmp::eq(&arr1_trimmed, &arr2_trimmed).ok()?;
667
668    for j in 0..lt_res.len() {
669        // In Postgres, NULL values in lists are always considered to be greater than non-NULL values:
670        //
671        // $ SELECT ARRAY[NULL]::integer[] > ARRAY[1]
672        // true
673        //
674        // These next two if statements are introduced for replicating Postgres behavior, as
675        // arrow::compute does not account for this.
676        if arr1_trimmed.is_null(j) && !arr2_trimmed.is_null(j) {
677            return Some(Ordering::Greater);
678        }
679        if !arr1_trimmed.is_null(j) && arr2_trimmed.is_null(j) {
680            return Some(Ordering::Less);
681        }
682
683        if lt_res.is_valid(j) && lt_res.value(j) {
684            return Some(Ordering::Less);
685        }
686        if eq_res.is_valid(j) && !eq_res.value(j) {
687            return Some(Ordering::Greater);
688        }
689    }
690
691    Some(arr1.len().cmp(&arr2.len()))
692}
693
694fn flatten<'a>(array: &'a StructArray, columns: &mut Vec<&'a ArrayRef>) {
695    for i in 0..array.num_columns() {
696        let column = array.column(i);
697        if let Some(nested_struct) = column.as_any().downcast_ref::<StructArray>() {
698            // If it's a nested struct, recursively expand
699            flatten(nested_struct, columns);
700        } else {
701            // If it's a primitive type, add directly
702            columns.push(column);
703        }
704    }
705}
706
707pub fn partial_cmp_struct(s1: &StructArray, s2: &StructArray) -> Option<Ordering> {
708    if s1.len() != s2.len() {
709        return None;
710    }
711
712    if s1.data_type() != s2.data_type() {
713        return None;
714    }
715
716    let mut expanded_columns1 = Vec::with_capacity(s1.num_columns());
717    let mut expanded_columns2 = Vec::with_capacity(s2.num_columns());
718
719    flatten(s1, &mut expanded_columns1);
720    flatten(s2, &mut expanded_columns2);
721
722    for col_index in 0..expanded_columns1.len() {
723        let arr1 = expanded_columns1[col_index];
724        let arr2 = expanded_columns2[col_index];
725
726        let lt_res = arrow::compute::kernels::cmp::lt(arr1, arr2).ok()?;
727        let eq_res = arrow::compute::kernels::cmp::eq(arr1, arr2).ok()?;
728
729        for j in 0..lt_res.len() {
730            if lt_res.is_valid(j) && lt_res.value(j) {
731                return Some(Ordering::Less);
732            }
733            if eq_res.is_valid(j) && !eq_res.value(j) {
734                return Some(Ordering::Greater);
735            }
736        }
737    }
738    Some(Ordering::Equal)
739}
740
741fn partial_cmp_map(m1: &Arc<MapArray>, m2: &Arc<MapArray>) -> Option<Ordering> {
742    if m1.len() != m2.len() {
743        return None;
744    }
745
746    if m1.data_type() != m2.data_type() {
747        return None;
748    }
749
750    for col_index in 0..m1.len() {
751        let arr1 = m1.entries().column(col_index);
752        let arr2 = m2.entries().column(col_index);
753
754        let lt_res = arrow::compute::kernels::cmp::lt(arr1, arr2).ok()?;
755        let eq_res = arrow::compute::kernels::cmp::eq(arr1, arr2).ok()?;
756
757        for j in 0..lt_res.len() {
758            if lt_res.is_valid(j) && lt_res.value(j) {
759                return Some(Ordering::Less);
760            }
761            if eq_res.is_valid(j) && !eq_res.value(j) {
762                return Some(Ordering::Greater);
763            }
764        }
765    }
766    Some(Ordering::Equal)
767}
768
769impl Eq for ScalarValue {}
770
771//Float wrapper over f32/f64. Just because we cannot build std::hash::Hash for floats directly we have to do it through type wrapper
772struct Fl<T>(T);
773
774macro_rules! hash_float_value {
775    ($(($t:ty, $i:ty)),+) => {
776        $(impl std::hash::Hash for Fl<$t> {
777            #[inline]
778            fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
779                state.write(&<$i>::from_ne_bytes(self.0.to_ne_bytes()).to_ne_bytes())
780            }
781        })+
782    };
783}
784
785hash_float_value!((f64, u64), (f32, u32));
786
787// manual implementation of `Hash`
788//
789// # Panics
790//
791// Panics if there is an error when creating hash values for rows
792impl Hash for ScalarValue {
793    fn hash<H: Hasher>(&self, state: &mut H) {
794        use ScalarValue::*;
795        match self {
796            Decimal32(v, p, s) => {
797                v.hash(state);
798                p.hash(state);
799                s.hash(state)
800            }
801            Decimal64(v, p, s) => {
802                v.hash(state);
803                p.hash(state);
804                s.hash(state)
805            }
806            Decimal128(v, p, s) => {
807                v.hash(state);
808                p.hash(state);
809                s.hash(state)
810            }
811            Decimal256(v, p, s) => {
812                v.hash(state);
813                p.hash(state);
814                s.hash(state)
815            }
816            Boolean(v) => v.hash(state),
817            Float16(v) => v.map(Fl).hash(state),
818            Float32(v) => v.map(Fl).hash(state),
819            Float64(v) => v.map(Fl).hash(state),
820            Int8(v) => v.hash(state),
821            Int16(v) => v.hash(state),
822            Int32(v) => v.hash(state),
823            Int64(v) => v.hash(state),
824            UInt8(v) => v.hash(state),
825            UInt16(v) => v.hash(state),
826            UInt32(v) => v.hash(state),
827            UInt64(v) => v.hash(state),
828            Utf8(v) | LargeUtf8(v) | Utf8View(v) => v.hash(state),
829            Binary(v) | FixedSizeBinary(_, v) | LargeBinary(v) | BinaryView(v) => {
830                v.hash(state)
831            }
832            List(arr) => {
833                hash_nested_array(arr.to_owned() as ArrayRef, state);
834            }
835            LargeList(arr) => {
836                hash_nested_array(arr.to_owned() as ArrayRef, state);
837            }
838            FixedSizeList(arr) => {
839                hash_nested_array(arr.to_owned() as ArrayRef, state);
840            }
841            Struct(arr) => {
842                hash_nested_array(arr.to_owned() as ArrayRef, state);
843            }
844            Map(arr) => {
845                hash_nested_array(arr.to_owned() as ArrayRef, state);
846            }
847            Date32(v) => v.hash(state),
848            Date64(v) => v.hash(state),
849            Time32Second(v) => v.hash(state),
850            Time32Millisecond(v) => v.hash(state),
851            Time64Microsecond(v) => v.hash(state),
852            Time64Nanosecond(v) => v.hash(state),
853            TimestampSecond(v, _) => v.hash(state),
854            TimestampMillisecond(v, _) => v.hash(state),
855            TimestampMicrosecond(v, _) => v.hash(state),
856            TimestampNanosecond(v, _) => v.hash(state),
857            DurationSecond(v) => v.hash(state),
858            DurationMillisecond(v) => v.hash(state),
859            DurationMicrosecond(v) => v.hash(state),
860            DurationNanosecond(v) => v.hash(state),
861            IntervalYearMonth(v) => v.hash(state),
862            IntervalDayTime(v) => v.hash(state),
863            IntervalMonthDayNano(v) => v.hash(state),
864            Union(v, t, m) => {
865                v.hash(state);
866                t.hash(state);
867                m.hash(state);
868            }
869            Dictionary(k, v) => {
870                k.hash(state);
871                v.hash(state);
872            }
873            // stable hash for Null value
874            Null => 1.hash(state),
875        }
876    }
877}
878
879fn hash_nested_array<H: Hasher>(arr: ArrayRef, state: &mut H) {
880    let len = arr.len();
881    let arrays = vec![arr];
882    let hashes_buffer = &mut vec![0; len];
883    let random_state = ahash::RandomState::with_seeds(0, 0, 0, 0);
884    let hashes = create_hashes(&arrays, &random_state, hashes_buffer).unwrap();
885    // Hash back to std::hash::Hasher
886    hashes.hash(state);
887}
888
889/// Return a reference to the values array and the index into it for a
890/// dictionary array
891///
892/// # Errors
893///
894/// Errors if the array cannot be downcasted to DictionaryArray
895#[inline]
896pub fn get_dict_value<K: ArrowDictionaryKeyType>(
897    array: &dyn Array,
898    index: usize,
899) -> Result<(&ArrayRef, Option<usize>)> {
900    let dict_array = as_dictionary_array::<K>(array)?;
901    Ok((dict_array.values(), dict_array.key(index)))
902}
903
904/// Create a dictionary array representing `value` repeated `size`
905/// times
906fn dict_from_scalar<K: ArrowDictionaryKeyType>(
907    value: &ScalarValue,
908    size: usize,
909) -> Result<ArrayRef> {
910    // values array is one element long (the value)
911    let values_array = value.to_array_of_size(1)?;
912
913    // Create a key array with `size` elements, each of 0
914    // Use cache to avoid repeated allocations for the same size
915    let key_array: PrimitiveArray<K> =
916        get_or_create_cached_key_array::<K>(size, value.is_null());
917
918    // create a new DictionaryArray
919    //
920    // Note: this path could be made faster by using the ArrayData
921    // APIs and skipping validation, if it every comes up in
922    // performance traces.
923    Ok(Arc::new(
924        DictionaryArray::<K>::try_new(key_array, values_array)?, // should always be valid by construction above
925    ))
926}
927
928/// Create a `DictionaryArray` from the provided values array.
929///
930/// Each element gets a unique key (`0..N-1`), without deduplication.
931/// Useful for wrapping arrays in dictionary form.
932///
933/// # Input
934/// ["alice", "bob", "alice", null, "carol"]
935///
936/// # Output
937/// `DictionaryArray<Int32>`
938/// {
939///   keys:   [0, 1, 2, 3, 4],
940///   values: ["alice", "bob", "alice", null, "carol"]
941/// }
942pub fn dict_from_values<K: ArrowDictionaryKeyType>(
943    values_array: ArrayRef,
944) -> Result<ArrayRef> {
945    // Create a key array with `size` elements of 0..array_len for all
946    // non-null value elements
947    let key_array: PrimitiveArray<K> = (0..values_array.len())
948        .map(|index| {
949            if values_array.is_valid(index) {
950                let native_index = K::Native::from_usize(index).ok_or_else(|| {
951                    _internal_datafusion_err!(
952                        "Can not create index of type {} from value {index}",
953                        K::DATA_TYPE
954                    )
955                })?;
956                Ok(Some(native_index))
957            } else {
958                Ok(None)
959            }
960        })
961        .collect::<Result<Vec<_>>>()?
962        .into_iter()
963        .collect();
964
965    // create a new DictionaryArray
966    //
967    // Note: this path could be made faster by using the ArrayData
968    // APIs and skipping validation, if it every comes up in
969    // performance traces.
970    let dict_array = DictionaryArray::<K>::try_new(key_array, values_array)?;
971    Ok(Arc::new(dict_array))
972}
973
974macro_rules! typed_cast_tz {
975    ($array:expr, $index:expr, $array_cast:ident, $SCALAR:ident, $TZ:expr) => {{
976        let array = $array_cast($array)?;
977        Ok::<ScalarValue, DataFusionError>(ScalarValue::$SCALAR(
978            match array.is_null($index) {
979                true => None,
980                false => Some(array.value($index).into()),
981            },
982            $TZ.clone(),
983        ))
984    }};
985}
986
987macro_rules! typed_cast {
988    ($array:expr, $index:expr, $array_cast:ident, $SCALAR:ident) => {{
989        let array = $array_cast($array)?;
990        Ok::<ScalarValue, DataFusionError>(ScalarValue::$SCALAR(
991            match array.is_null($index) {
992                true => None,
993                false => Some(array.value($index).into()),
994            },
995        ))
996    }};
997}
998
999macro_rules! build_array_from_option {
1000    ($DATA_TYPE:ident, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {{
1001        match $EXPR {
1002            Some(value) => Arc::new($ARRAY_TYPE::from_value(*value, $SIZE)),
1003            None => new_null_array(&DataType::$DATA_TYPE, $SIZE),
1004        }
1005    }};
1006    ($DATA_TYPE:ident, $ENUM:expr, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {{
1007        match $EXPR {
1008            Some(value) => Arc::new($ARRAY_TYPE::from_value(*value, $SIZE)),
1009            None => new_null_array(&DataType::$DATA_TYPE($ENUM), $SIZE),
1010        }
1011    }};
1012}
1013
1014macro_rules! build_timestamp_array_from_option {
1015    ($TIME_UNIT:expr, $TZ:expr, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {
1016        match $EXPR {
1017            Some(value) => {
1018                Arc::new($ARRAY_TYPE::from_value(*value, $SIZE).with_timezone_opt($TZ))
1019            }
1020            None => new_null_array(&DataType::Timestamp($TIME_UNIT, $TZ), $SIZE),
1021        }
1022    };
1023}
1024
1025macro_rules! eq_array_primitive {
1026    ($array:expr, $index:expr, $array_cast:ident, $VALUE:expr) => {{
1027        let array = $array_cast($array)?;
1028        let is_valid = array.is_valid($index);
1029        Ok::<bool, DataFusionError>(match $VALUE {
1030            Some(val) => is_valid && &array.value($index) == val,
1031            None => !is_valid,
1032        })
1033    }};
1034}
1035
1036impl ScalarValue {
1037    /// Create a [`Result<ScalarValue>`] with the provided value and datatype
1038    ///
1039    /// # Panics
1040    ///
1041    /// Panics if d is not compatible with T
1042    pub fn new_primitive<T: ArrowPrimitiveType>(
1043        a: Option<T::Native>,
1044        d: &DataType,
1045    ) -> Result<Self> {
1046        match a {
1047            None => d.try_into(),
1048            Some(v) => {
1049                let array = PrimitiveArray::<T>::new(vec![v].into(), None)
1050                    .with_data_type(d.clone());
1051                Self::try_from_array(&array, 0)
1052            }
1053        }
1054    }
1055
1056    /// Create a decimal Scalar from value/precision and scale.
1057    pub fn try_new_decimal128(value: i128, precision: u8, scale: i8) -> Result<Self> {
1058        // make sure the precision and scale is valid
1059        if precision <= DECIMAL128_MAX_PRECISION && scale.unsigned_abs() <= precision {
1060            return Ok(ScalarValue::Decimal128(Some(value), precision, scale));
1061        }
1062        _internal_err!(
1063            "Can not new a decimal type ScalarValue for precision {precision} and scale {scale}"
1064        )
1065    }
1066
1067    /// Create a Null instance of ScalarValue for this datatype
1068    ///
1069    /// Example
1070    /// ```
1071    /// use arrow::datatypes::DataType;
1072    /// use datafusion_common::ScalarValue;
1073    ///
1074    /// let scalar = ScalarValue::try_new_null(&DataType::Int32).unwrap();
1075    /// assert_eq!(scalar.is_null(), true);
1076    /// assert_eq!(scalar.data_type(), DataType::Int32);
1077    /// ```
1078    pub fn try_new_null(data_type: &DataType) -> Result<Self> {
1079        Ok(match data_type {
1080            DataType::Boolean => ScalarValue::Boolean(None),
1081            DataType::Float16 => ScalarValue::Float16(None),
1082            DataType::Float64 => ScalarValue::Float64(None),
1083            DataType::Float32 => ScalarValue::Float32(None),
1084            DataType::Int8 => ScalarValue::Int8(None),
1085            DataType::Int16 => ScalarValue::Int16(None),
1086            DataType::Int32 => ScalarValue::Int32(None),
1087            DataType::Int64 => ScalarValue::Int64(None),
1088            DataType::UInt8 => ScalarValue::UInt8(None),
1089            DataType::UInt16 => ScalarValue::UInt16(None),
1090            DataType::UInt32 => ScalarValue::UInt32(None),
1091            DataType::UInt64 => ScalarValue::UInt64(None),
1092            DataType::Decimal32(precision, scale) => {
1093                ScalarValue::Decimal32(None, *precision, *scale)
1094            }
1095            DataType::Decimal64(precision, scale) => {
1096                ScalarValue::Decimal64(None, *precision, *scale)
1097            }
1098            DataType::Decimal128(precision, scale) => {
1099                ScalarValue::Decimal128(None, *precision, *scale)
1100            }
1101            DataType::Decimal256(precision, scale) => {
1102                ScalarValue::Decimal256(None, *precision, *scale)
1103            }
1104            DataType::Utf8 => ScalarValue::Utf8(None),
1105            DataType::LargeUtf8 => ScalarValue::LargeUtf8(None),
1106            DataType::Utf8View => ScalarValue::Utf8View(None),
1107            DataType::Binary => ScalarValue::Binary(None),
1108            DataType::BinaryView => ScalarValue::BinaryView(None),
1109            DataType::FixedSizeBinary(len) => ScalarValue::FixedSizeBinary(*len, None),
1110            DataType::LargeBinary => ScalarValue::LargeBinary(None),
1111            DataType::Date32 => ScalarValue::Date32(None),
1112            DataType::Date64 => ScalarValue::Date64(None),
1113            DataType::Time32(TimeUnit::Second) => ScalarValue::Time32Second(None),
1114            DataType::Time32(TimeUnit::Millisecond) => {
1115                ScalarValue::Time32Millisecond(None)
1116            }
1117            DataType::Time64(TimeUnit::Microsecond) => {
1118                ScalarValue::Time64Microsecond(None)
1119            }
1120            DataType::Time64(TimeUnit::Nanosecond) => ScalarValue::Time64Nanosecond(None),
1121            DataType::Timestamp(TimeUnit::Second, tz_opt) => {
1122                ScalarValue::TimestampSecond(None, tz_opt.clone())
1123            }
1124            DataType::Timestamp(TimeUnit::Millisecond, tz_opt) => {
1125                ScalarValue::TimestampMillisecond(None, tz_opt.clone())
1126            }
1127            DataType::Timestamp(TimeUnit::Microsecond, tz_opt) => {
1128                ScalarValue::TimestampMicrosecond(None, tz_opt.clone())
1129            }
1130            DataType::Timestamp(TimeUnit::Nanosecond, tz_opt) => {
1131                ScalarValue::TimestampNanosecond(None, tz_opt.clone())
1132            }
1133            DataType::Interval(IntervalUnit::YearMonth) => {
1134                ScalarValue::IntervalYearMonth(None)
1135            }
1136            DataType::Interval(IntervalUnit::DayTime) => {
1137                ScalarValue::IntervalDayTime(None)
1138            }
1139            DataType::Interval(IntervalUnit::MonthDayNano) => {
1140                ScalarValue::IntervalMonthDayNano(None)
1141            }
1142            DataType::Duration(TimeUnit::Second) => ScalarValue::DurationSecond(None),
1143            DataType::Duration(TimeUnit::Millisecond) => {
1144                ScalarValue::DurationMillisecond(None)
1145            }
1146            DataType::Duration(TimeUnit::Microsecond) => {
1147                ScalarValue::DurationMicrosecond(None)
1148            }
1149            DataType::Duration(TimeUnit::Nanosecond) => {
1150                ScalarValue::DurationNanosecond(None)
1151            }
1152            DataType::Dictionary(index_type, value_type) => ScalarValue::Dictionary(
1153                index_type.clone(),
1154                Box::new(value_type.as_ref().try_into()?),
1155            ),
1156            // `ScalaValue::List` contains single element `ListArray`.
1157            DataType::List(field_ref) => ScalarValue::List(Arc::new(
1158                GenericListArray::new_null(Arc::clone(field_ref), 1),
1159            )),
1160            // `ScalarValue::LargeList` contains single element `LargeListArray`.
1161            DataType::LargeList(field_ref) => ScalarValue::LargeList(Arc::new(
1162                GenericListArray::new_null(Arc::clone(field_ref), 1),
1163            )),
1164            // `ScalaValue::FixedSizeList` contains single element `FixedSizeList`.
1165            DataType::FixedSizeList(field_ref, fixed_length) => {
1166                ScalarValue::FixedSizeList(Arc::new(FixedSizeListArray::new_null(
1167                    Arc::clone(field_ref),
1168                    *fixed_length,
1169                    1,
1170                )))
1171            }
1172            DataType::Struct(fields) => ScalarValue::Struct(
1173                new_null_array(&DataType::Struct(fields.to_owned()), 1)
1174                    .as_struct()
1175                    .to_owned()
1176                    .into(),
1177            ),
1178            DataType::Map(fields, sorted) => ScalarValue::Map(
1179                new_null_array(&DataType::Map(fields.to_owned(), sorted.to_owned()), 1)
1180                    .as_map()
1181                    .to_owned()
1182                    .into(),
1183            ),
1184            DataType::Union(fields, mode) => {
1185                ScalarValue::Union(None, fields.clone(), *mode)
1186            }
1187            DataType::Null => ScalarValue::Null,
1188            _ => {
1189                return _not_impl_err!(
1190                    "Can't create a null scalar from data_type \"{data_type}\""
1191                );
1192            }
1193        })
1194    }
1195
1196    /// Returns a [`ScalarValue::Utf8`] representing `val`
1197    pub fn new_utf8(val: impl Into<String>) -> Self {
1198        ScalarValue::from(val.into())
1199    }
1200
1201    /// Returns a [`ScalarValue::Utf8View`] representing `val`
1202    pub fn new_utf8view(val: impl Into<String>) -> Self {
1203        ScalarValue::Utf8View(Some(val.into()))
1204    }
1205
1206    /// Returns a [`ScalarValue::IntervalYearMonth`] representing
1207    /// `years` years and `months` months
1208    pub fn new_interval_ym(years: i32, months: i32) -> Self {
1209        let val = IntervalYearMonthType::make_value(years, months);
1210        ScalarValue::IntervalYearMonth(Some(val))
1211    }
1212
1213    /// Returns a [`ScalarValue::IntervalDayTime`] representing
1214    /// `days` days and `millis` milliseconds
1215    pub fn new_interval_dt(days: i32, millis: i32) -> Self {
1216        let val = IntervalDayTimeType::make_value(days, millis);
1217        Self::IntervalDayTime(Some(val))
1218    }
1219
1220    /// Returns a [`ScalarValue::IntervalMonthDayNano`] representing
1221    /// `months` months and `days` days, and `nanos` nanoseconds
1222    pub fn new_interval_mdn(months: i32, days: i32, nanos: i64) -> Self {
1223        let val = IntervalMonthDayNanoType::make_value(months, days, nanos);
1224        ScalarValue::IntervalMonthDayNano(Some(val))
1225    }
1226
1227    /// Returns a [`ScalarValue`] representing
1228    /// `value` and `tz_opt` timezone
1229    pub fn new_timestamp<T: ArrowTimestampType>(
1230        value: Option<i64>,
1231        tz_opt: Option<Arc<str>>,
1232    ) -> Self {
1233        match T::UNIT {
1234            TimeUnit::Second => ScalarValue::TimestampSecond(value, tz_opt),
1235            TimeUnit::Millisecond => ScalarValue::TimestampMillisecond(value, tz_opt),
1236            TimeUnit::Microsecond => ScalarValue::TimestampMicrosecond(value, tz_opt),
1237            TimeUnit::Nanosecond => ScalarValue::TimestampNanosecond(value, tz_opt),
1238        }
1239    }
1240
1241    /// Returns a [`ScalarValue`] representing PI
1242    pub fn new_pi(datatype: &DataType) -> Result<ScalarValue> {
1243        match datatype {
1244            DataType::Float32 => Ok(ScalarValue::from(std::f32::consts::PI)),
1245            DataType::Float64 => Ok(ScalarValue::from(std::f64::consts::PI)),
1246            _ => _internal_err!("PI is not supported for data type: {}", datatype),
1247        }
1248    }
1249
1250    /// Returns a [`ScalarValue`] representing PI's upper bound
1251    pub fn new_pi_upper(datatype: &DataType) -> Result<ScalarValue> {
1252        match datatype {
1253            DataType::Float32 => Ok(ScalarValue::from(consts::PI_UPPER_F32)),
1254            DataType::Float64 => Ok(ScalarValue::from(consts::PI_UPPER_F64)),
1255            _ => {
1256                _internal_err!("PI_UPPER is not supported for data type: {}", datatype)
1257            }
1258        }
1259    }
1260
1261    /// Returns a [`ScalarValue`] representing -PI's lower bound
1262    pub fn new_negative_pi_lower(datatype: &DataType) -> Result<ScalarValue> {
1263        match datatype {
1264            DataType::Float32 => Ok(ScalarValue::from(consts::NEGATIVE_PI_LOWER_F32)),
1265            DataType::Float64 => Ok(ScalarValue::from(consts::NEGATIVE_PI_LOWER_F64)),
1266            _ => {
1267                _internal_err!("-PI_LOWER is not supported for data type: {}", datatype)
1268            }
1269        }
1270    }
1271
1272    /// Returns a [`ScalarValue`] representing FRAC_PI_2's upper bound
1273    pub fn new_frac_pi_2_upper(datatype: &DataType) -> Result<ScalarValue> {
1274        match datatype {
1275            DataType::Float32 => Ok(ScalarValue::from(consts::FRAC_PI_2_UPPER_F32)),
1276            DataType::Float64 => Ok(ScalarValue::from(consts::FRAC_PI_2_UPPER_F64)),
1277            _ => {
1278                _internal_err!("PI_UPPER/2 is not supported for data type: {}", datatype)
1279            }
1280        }
1281    }
1282
1283    // Returns a [`ScalarValue`] representing FRAC_PI_2's lower bound
1284    pub fn new_neg_frac_pi_2_lower(datatype: &DataType) -> Result<ScalarValue> {
1285        match datatype {
1286            DataType::Float32 => {
1287                Ok(ScalarValue::from(consts::NEGATIVE_FRAC_PI_2_LOWER_F32))
1288            }
1289            DataType::Float64 => {
1290                Ok(ScalarValue::from(consts::NEGATIVE_FRAC_PI_2_LOWER_F64))
1291            }
1292            _ => {
1293                _internal_err!("-PI/2_LOWER is not supported for data type: {}", datatype)
1294            }
1295        }
1296    }
1297
1298    /// Returns a [`ScalarValue`] representing -PI
1299    pub fn new_negative_pi(datatype: &DataType) -> Result<ScalarValue> {
1300        match datatype {
1301            DataType::Float32 => Ok(ScalarValue::from(-std::f32::consts::PI)),
1302            DataType::Float64 => Ok(ScalarValue::from(-std::f64::consts::PI)),
1303            _ => _internal_err!("-PI is not supported for data type: {}", datatype),
1304        }
1305    }
1306
1307    /// Returns a [`ScalarValue`] representing PI/2
1308    pub fn new_frac_pi_2(datatype: &DataType) -> Result<ScalarValue> {
1309        match datatype {
1310            DataType::Float32 => Ok(ScalarValue::from(std::f32::consts::FRAC_PI_2)),
1311            DataType::Float64 => Ok(ScalarValue::from(std::f64::consts::FRAC_PI_2)),
1312            _ => _internal_err!("PI/2 is not supported for data type: {}", datatype),
1313        }
1314    }
1315
1316    /// Returns a [`ScalarValue`] representing -PI/2
1317    pub fn new_neg_frac_pi_2(datatype: &DataType) -> Result<ScalarValue> {
1318        match datatype {
1319            DataType::Float32 => Ok(ScalarValue::from(-std::f32::consts::FRAC_PI_2)),
1320            DataType::Float64 => Ok(ScalarValue::from(-std::f64::consts::FRAC_PI_2)),
1321            _ => _internal_err!("-PI/2 is not supported for data type: {}", datatype),
1322        }
1323    }
1324
1325    /// Returns a [`ScalarValue`] representing infinity
1326    pub fn new_infinity(datatype: &DataType) -> Result<ScalarValue> {
1327        match datatype {
1328            DataType::Float32 => Ok(ScalarValue::from(f32::INFINITY)),
1329            DataType::Float64 => Ok(ScalarValue::from(f64::INFINITY)),
1330            _ => {
1331                _internal_err!("Infinity is not supported for data type: {}", datatype)
1332            }
1333        }
1334    }
1335
1336    /// Returns a [`ScalarValue`] representing negative infinity
1337    pub fn new_neg_infinity(datatype: &DataType) -> Result<ScalarValue> {
1338        match datatype {
1339            DataType::Float32 => Ok(ScalarValue::from(f32::NEG_INFINITY)),
1340            DataType::Float64 => Ok(ScalarValue::from(f64::NEG_INFINITY)),
1341            _ => {
1342                _internal_err!(
1343                    "Negative Infinity is not supported for data type: {}",
1344                    datatype
1345                )
1346            }
1347        }
1348    }
1349
1350    /// Create a zero value in the given type.
1351    pub fn new_zero(datatype: &DataType) -> Result<ScalarValue> {
1352        Ok(match datatype {
1353            DataType::Boolean => ScalarValue::Boolean(Some(false)),
1354            DataType::Int8 => ScalarValue::Int8(Some(0)),
1355            DataType::Int16 => ScalarValue::Int16(Some(0)),
1356            DataType::Int32 => ScalarValue::Int32(Some(0)),
1357            DataType::Int64 => ScalarValue::Int64(Some(0)),
1358            DataType::UInt8 => ScalarValue::UInt8(Some(0)),
1359            DataType::UInt16 => ScalarValue::UInt16(Some(0)),
1360            DataType::UInt32 => ScalarValue::UInt32(Some(0)),
1361            DataType::UInt64 => ScalarValue::UInt64(Some(0)),
1362            DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(0.0))),
1363            DataType::Float32 => ScalarValue::Float32(Some(0.0)),
1364            DataType::Float64 => ScalarValue::Float64(Some(0.0)),
1365            DataType::Decimal32(precision, scale) => {
1366                ScalarValue::Decimal32(Some(0), *precision, *scale)
1367            }
1368            DataType::Decimal64(precision, scale) => {
1369                ScalarValue::Decimal64(Some(0), *precision, *scale)
1370            }
1371            DataType::Decimal128(precision, scale) => {
1372                ScalarValue::Decimal128(Some(0), *precision, *scale)
1373            }
1374            DataType::Decimal256(precision, scale) => {
1375                ScalarValue::Decimal256(Some(i256::ZERO), *precision, *scale)
1376            }
1377            DataType::Timestamp(TimeUnit::Second, tz) => {
1378                ScalarValue::TimestampSecond(Some(0), tz.clone())
1379            }
1380            DataType::Timestamp(TimeUnit::Millisecond, tz) => {
1381                ScalarValue::TimestampMillisecond(Some(0), tz.clone())
1382            }
1383            DataType::Timestamp(TimeUnit::Microsecond, tz) => {
1384                ScalarValue::TimestampMicrosecond(Some(0), tz.clone())
1385            }
1386            DataType::Timestamp(TimeUnit::Nanosecond, tz) => {
1387                ScalarValue::TimestampNanosecond(Some(0), tz.clone())
1388            }
1389            DataType::Time32(TimeUnit::Second) => ScalarValue::Time32Second(Some(0)),
1390            DataType::Time32(TimeUnit::Millisecond) => {
1391                ScalarValue::Time32Millisecond(Some(0))
1392            }
1393            DataType::Time64(TimeUnit::Microsecond) => {
1394                ScalarValue::Time64Microsecond(Some(0))
1395            }
1396            DataType::Time64(TimeUnit::Nanosecond) => {
1397                ScalarValue::Time64Nanosecond(Some(0))
1398            }
1399            DataType::Interval(IntervalUnit::YearMonth) => {
1400                ScalarValue::IntervalYearMonth(Some(0))
1401            }
1402            DataType::Interval(IntervalUnit::DayTime) => {
1403                ScalarValue::IntervalDayTime(Some(IntervalDayTime::ZERO))
1404            }
1405            DataType::Interval(IntervalUnit::MonthDayNano) => {
1406                ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano::ZERO))
1407            }
1408            DataType::Duration(TimeUnit::Second) => ScalarValue::DurationSecond(Some(0)),
1409            DataType::Duration(TimeUnit::Millisecond) => {
1410                ScalarValue::DurationMillisecond(Some(0))
1411            }
1412            DataType::Duration(TimeUnit::Microsecond) => {
1413                ScalarValue::DurationMicrosecond(Some(0))
1414            }
1415            DataType::Duration(TimeUnit::Nanosecond) => {
1416                ScalarValue::DurationNanosecond(Some(0))
1417            }
1418            DataType::Date32 => ScalarValue::Date32(Some(0)),
1419            DataType::Date64 => ScalarValue::Date64(Some(0)),
1420            _ => {
1421                return _not_impl_err!(
1422                    "Can't create a zero scalar from data_type \"{datatype}\""
1423                );
1424            }
1425        })
1426    }
1427
1428    /// Returns a default value for the given `DataType`.
1429    ///
1430    /// This function is useful when you need to initialize a column with
1431    /// non-null values in a DataFrame or when you need a "zero" value
1432    /// for a specific data type.
1433    ///
1434    /// # Default Values
1435    ///
1436    /// - **Numeric types**: Returns zero (via [`new_zero`])
1437    /// - **String types**: Returns empty string (`""`)
1438    /// - **Binary types**: Returns empty byte array
1439    /// - **Temporal types**: Returns zero/epoch value
1440    /// - **List types**: Returns empty list
1441    /// - **Struct types**: Returns struct with all fields set to their defaults
1442    /// - **Dictionary types**: Returns dictionary with default value
1443    /// - **Map types**: Returns empty map
1444    /// - **Union types**: Returns first variant with default value
1445    ///
1446    /// # Errors
1447    ///
1448    /// Returns an error for data types that don't have a clear default value
1449    /// or are not yet supported (e.g., `RunEndEncoded`).
1450    ///
1451    /// [`new_zero`]: Self::new_zero
1452    pub fn new_default(datatype: &DataType) -> Result<ScalarValue> {
1453        match datatype {
1454            // Null type
1455            DataType::Null => Ok(ScalarValue::Null),
1456
1457            // Numeric types
1458            DataType::Boolean
1459            | DataType::Int8
1460            | DataType::Int16
1461            | DataType::Int32
1462            | DataType::Int64
1463            | DataType::UInt8
1464            | DataType::UInt16
1465            | DataType::UInt32
1466            | DataType::UInt64
1467            | DataType::Float16
1468            | DataType::Float32
1469            | DataType::Float64
1470            | DataType::Decimal128(_, _)
1471            | DataType::Decimal256(_, _)
1472            | DataType::Timestamp(_, _)
1473            | DataType::Time32(_)
1474            | DataType::Time64(_)
1475            | DataType::Interval(_)
1476            | DataType::Duration(_)
1477            | DataType::Date32
1478            | DataType::Date64 => ScalarValue::new_zero(datatype),
1479
1480            // String types
1481            DataType::Utf8 => Ok(ScalarValue::Utf8(Some("".to_string()))),
1482            DataType::LargeUtf8 => Ok(ScalarValue::LargeUtf8(Some("".to_string()))),
1483            DataType::Utf8View => Ok(ScalarValue::Utf8View(Some("".to_string()))),
1484
1485            // Binary types
1486            DataType::Binary => Ok(ScalarValue::Binary(Some(vec![]))),
1487            DataType::LargeBinary => Ok(ScalarValue::LargeBinary(Some(vec![]))),
1488            DataType::BinaryView => Ok(ScalarValue::BinaryView(Some(vec![]))),
1489
1490            // Fixed-size binary
1491            DataType::FixedSizeBinary(size) => Ok(ScalarValue::FixedSizeBinary(
1492                *size,
1493                Some(vec![0; *size as usize]),
1494            )),
1495
1496            // List types
1497            DataType::List(field) => {
1498                let list =
1499                    ScalarValue::new_list(&[], field.data_type(), field.is_nullable());
1500                Ok(ScalarValue::List(list))
1501            }
1502            DataType::FixedSizeList(field, _size) => {
1503                let empty_arr = new_empty_array(field.data_type());
1504                let values = Arc::new(
1505                    SingleRowListArrayBuilder::new(empty_arr)
1506                        .with_nullable(field.is_nullable())
1507                        .build_fixed_size_list_array(0),
1508                );
1509                Ok(ScalarValue::FixedSizeList(values))
1510            }
1511            DataType::LargeList(field) => {
1512                let list = ScalarValue::new_large_list(&[], field.data_type());
1513                Ok(ScalarValue::LargeList(list))
1514            }
1515
1516            // Struct types
1517            DataType::Struct(fields) => {
1518                let values = fields
1519                    .iter()
1520                    .map(|f| ScalarValue::new_default(f.data_type()))
1521                    .collect::<Result<Vec<_>>>()?;
1522                Ok(ScalarValue::Struct(Arc::new(StructArray::new(
1523                    fields.clone(),
1524                    values
1525                        .into_iter()
1526                        .map(|v| v.to_array())
1527                        .collect::<Result<_>>()?,
1528                    None,
1529                ))))
1530            }
1531
1532            // Dictionary types
1533            DataType::Dictionary(key_type, value_type) => Ok(ScalarValue::Dictionary(
1534                key_type.clone(),
1535                Box::new(ScalarValue::new_default(value_type)?),
1536            )),
1537
1538            // Map types
1539            DataType::Map(field, _) => Ok(ScalarValue::Map(Arc::new(MapArray::from(
1540                ArrayData::new_empty(field.data_type()),
1541            )))),
1542
1543            // Union types - return first variant with default value
1544            DataType::Union(fields, mode) => {
1545                if let Some((type_id, field)) = fields.iter().next() {
1546                    let default_value = ScalarValue::new_default(field.data_type())?;
1547                    Ok(ScalarValue::Union(
1548                        Some((type_id, Box::new(default_value))),
1549                        fields.clone(),
1550                        *mode,
1551                    ))
1552                } else {
1553                    _internal_err!("Union type must have at least one field")
1554                }
1555            }
1556
1557            // Unsupported types for now
1558            _ => {
1559                _not_impl_err!(
1560                    "Default value for data_type \"{datatype}\" is not implemented yet"
1561                )
1562            }
1563        }
1564    }
1565
1566    /// Create an one value in the given type.
1567    pub fn new_one(datatype: &DataType) -> Result<ScalarValue> {
1568        Ok(match datatype {
1569            DataType::Int8 => ScalarValue::Int8(Some(1)),
1570            DataType::Int16 => ScalarValue::Int16(Some(1)),
1571            DataType::Int32 => ScalarValue::Int32(Some(1)),
1572            DataType::Int64 => ScalarValue::Int64(Some(1)),
1573            DataType::UInt8 => ScalarValue::UInt8(Some(1)),
1574            DataType::UInt16 => ScalarValue::UInt16(Some(1)),
1575            DataType::UInt32 => ScalarValue::UInt32(Some(1)),
1576            DataType::UInt64 => ScalarValue::UInt64(Some(1)),
1577            DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(1.0))),
1578            DataType::Float32 => ScalarValue::Float32(Some(1.0)),
1579            DataType::Float64 => ScalarValue::Float64(Some(1.0)),
1580            DataType::Decimal32(precision, scale) => {
1581                validate_decimal_precision_and_scale::<Decimal32Type>(
1582                    *precision, *scale,
1583                )?;
1584                if *scale < 0 {
1585                    return _internal_err!("Negative scale is not supported");
1586                }
1587                match 10_i32.checked_pow(*scale as u32) {
1588                    Some(value) => {
1589                        ScalarValue::Decimal32(Some(value), *precision, *scale)
1590                    }
1591                    None => return _internal_err!("Unsupported scale {scale}"),
1592                }
1593            }
1594            DataType::Decimal64(precision, scale) => {
1595                validate_decimal_precision_and_scale::<Decimal64Type>(
1596                    *precision, *scale,
1597                )?;
1598                if *scale < 0 {
1599                    return _internal_err!("Negative scale is not supported");
1600                }
1601                match i64::from(10).checked_pow(*scale as u32) {
1602                    Some(value) => {
1603                        ScalarValue::Decimal64(Some(value), *precision, *scale)
1604                    }
1605                    None => return _internal_err!("Unsupported scale {scale}"),
1606                }
1607            }
1608            DataType::Decimal128(precision, scale) => {
1609                validate_decimal_precision_and_scale::<Decimal128Type>(
1610                    *precision, *scale,
1611                )?;
1612                if *scale < 0 {
1613                    return _internal_err!("Negative scale is not supported");
1614                }
1615                match i128::from(10).checked_pow(*scale as u32) {
1616                    Some(value) => {
1617                        ScalarValue::Decimal128(Some(value), *precision, *scale)
1618                    }
1619                    None => return _internal_err!("Unsupported scale {scale}"),
1620                }
1621            }
1622            DataType::Decimal256(precision, scale) => {
1623                validate_decimal_precision_and_scale::<Decimal256Type>(
1624                    *precision, *scale,
1625                )?;
1626                if *scale < 0 {
1627                    return _internal_err!("Negative scale is not supported");
1628                }
1629                match i256::from(10).checked_pow(*scale as u32) {
1630                    Some(value) => {
1631                        ScalarValue::Decimal256(Some(value), *precision, *scale)
1632                    }
1633                    None => return _internal_err!("Unsupported scale {scale}"),
1634                }
1635            }
1636            _ => {
1637                return _not_impl_err!(
1638                    "Can't create an one scalar from data_type \"{datatype}\""
1639                );
1640            }
1641        })
1642    }
1643
1644    /// Create a negative one value in the given type.
1645    pub fn new_negative_one(datatype: &DataType) -> Result<ScalarValue> {
1646        Ok(match datatype {
1647            DataType::Int8 | DataType::UInt8 => ScalarValue::Int8(Some(-1)),
1648            DataType::Int16 | DataType::UInt16 => ScalarValue::Int16(Some(-1)),
1649            DataType::Int32 | DataType::UInt32 => ScalarValue::Int32(Some(-1)),
1650            DataType::Int64 | DataType::UInt64 => ScalarValue::Int64(Some(-1)),
1651            DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(-1.0))),
1652            DataType::Float32 => ScalarValue::Float32(Some(-1.0)),
1653            DataType::Float64 => ScalarValue::Float64(Some(-1.0)),
1654            DataType::Decimal32(precision, scale) => {
1655                validate_decimal_precision_and_scale::<Decimal32Type>(
1656                    *precision, *scale,
1657                )?;
1658                if *scale < 0 {
1659                    return _internal_err!("Negative scale is not supported");
1660                }
1661                match 10_i32.checked_pow(*scale as u32) {
1662                    Some(value) => {
1663                        ScalarValue::Decimal32(Some(-value), *precision, *scale)
1664                    }
1665                    None => return _internal_err!("Unsupported scale {scale}"),
1666                }
1667            }
1668            DataType::Decimal64(precision, scale) => {
1669                validate_decimal_precision_and_scale::<Decimal64Type>(
1670                    *precision, *scale,
1671                )?;
1672                if *scale < 0 {
1673                    return _internal_err!("Negative scale is not supported");
1674                }
1675                match i64::from(10).checked_pow(*scale as u32) {
1676                    Some(value) => {
1677                        ScalarValue::Decimal64(Some(-value), *precision, *scale)
1678                    }
1679                    None => return _internal_err!("Unsupported scale {scale}"),
1680                }
1681            }
1682            DataType::Decimal128(precision, scale) => {
1683                validate_decimal_precision_and_scale::<Decimal128Type>(
1684                    *precision, *scale,
1685                )?;
1686                if *scale < 0 {
1687                    return _internal_err!("Negative scale is not supported");
1688                }
1689                match i128::from(10).checked_pow(*scale as u32) {
1690                    Some(value) => {
1691                        ScalarValue::Decimal128(Some(-value), *precision, *scale)
1692                    }
1693                    None => return _internal_err!("Unsupported scale {scale}"),
1694                }
1695            }
1696            DataType::Decimal256(precision, scale) => {
1697                validate_decimal_precision_and_scale::<Decimal256Type>(
1698                    *precision, *scale,
1699                )?;
1700                if *scale < 0 {
1701                    return _internal_err!("Negative scale is not supported");
1702                }
1703                match i256::from(10).checked_pow(*scale as u32) {
1704                    Some(value) => {
1705                        ScalarValue::Decimal256(Some(-value), *precision, *scale)
1706                    }
1707                    None => return _internal_err!("Unsupported scale {scale}"),
1708                }
1709            }
1710            _ => {
1711                return _not_impl_err!(
1712                    "Can't create a negative one scalar from data_type \"{datatype}\""
1713                );
1714            }
1715        })
1716    }
1717
1718    pub fn new_ten(datatype: &DataType) -> Result<ScalarValue> {
1719        Ok(match datatype {
1720            DataType::Int8 => ScalarValue::Int8(Some(10)),
1721            DataType::Int16 => ScalarValue::Int16(Some(10)),
1722            DataType::Int32 => ScalarValue::Int32(Some(10)),
1723            DataType::Int64 => ScalarValue::Int64(Some(10)),
1724            DataType::UInt8 => ScalarValue::UInt8(Some(10)),
1725            DataType::UInt16 => ScalarValue::UInt16(Some(10)),
1726            DataType::UInt32 => ScalarValue::UInt32(Some(10)),
1727            DataType::UInt64 => ScalarValue::UInt64(Some(10)),
1728            DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(10.0))),
1729            DataType::Float32 => ScalarValue::Float32(Some(10.0)),
1730            DataType::Float64 => ScalarValue::Float64(Some(10.0)),
1731            DataType::Decimal32(precision, scale) => {
1732                if let Err(err) = validate_decimal_precision_and_scale::<Decimal32Type>(
1733                    *precision, *scale,
1734                ) {
1735                    return _internal_err!("Invalid precision and scale {err}");
1736                }
1737                if *scale <= 0 {
1738                    return _internal_err!("Negative scale is not supported");
1739                }
1740                match 10_i32.checked_pow((*scale + 1) as u32) {
1741                    Some(value) => {
1742                        ScalarValue::Decimal32(Some(value), *precision, *scale)
1743                    }
1744                    None => return _internal_err!("Unsupported scale {scale}"),
1745                }
1746            }
1747            DataType::Decimal64(precision, scale) => {
1748                if let Err(err) = validate_decimal_precision_and_scale::<Decimal64Type>(
1749                    *precision, *scale,
1750                ) {
1751                    return _internal_err!("Invalid precision and scale {err}");
1752                }
1753                if *scale <= 0 {
1754                    return _internal_err!("Negative scale is not supported");
1755                }
1756                match i64::from(10).checked_pow((*scale + 1) as u32) {
1757                    Some(value) => {
1758                        ScalarValue::Decimal64(Some(value), *precision, *scale)
1759                    }
1760                    None => return _internal_err!("Unsupported scale {scale}"),
1761                }
1762            }
1763            DataType::Decimal128(precision, scale) => {
1764                if let Err(err) = validate_decimal_precision_and_scale::<Decimal128Type>(
1765                    *precision, *scale,
1766                ) {
1767                    return _internal_err!("Invalid precision and scale {err}");
1768                }
1769                if *scale < 0 {
1770                    return _internal_err!("Negative scale is not supported");
1771                }
1772                match i128::from(10).checked_pow((*scale + 1) as u32) {
1773                    Some(value) => {
1774                        ScalarValue::Decimal128(Some(value), *precision, *scale)
1775                    }
1776                    None => return _internal_err!("Unsupported scale {scale}"),
1777                }
1778            }
1779            DataType::Decimal256(precision, scale) => {
1780                if let Err(err) = validate_decimal_precision_and_scale::<Decimal256Type>(
1781                    *precision, *scale,
1782                ) {
1783                    return _internal_err!("Invalid precision and scale {err}");
1784                }
1785                if *scale < 0 {
1786                    return _internal_err!("Negative scale is not supported");
1787                }
1788                match i256::from(10).checked_pow((*scale + 1) as u32) {
1789                    Some(value) => {
1790                        ScalarValue::Decimal256(Some(value), *precision, *scale)
1791                    }
1792                    None => return _internal_err!("Unsupported scale {scale}"),
1793                }
1794            }
1795            _ => {
1796                return _not_impl_err!(
1797                    "Can't create a ten scalar from data_type \"{datatype}\""
1798                );
1799            }
1800        })
1801    }
1802
1803    /// return the [`DataType`] of this `ScalarValue`
1804    pub fn data_type(&self) -> DataType {
1805        match self {
1806            ScalarValue::Boolean(_) => DataType::Boolean,
1807            ScalarValue::UInt8(_) => DataType::UInt8,
1808            ScalarValue::UInt16(_) => DataType::UInt16,
1809            ScalarValue::UInt32(_) => DataType::UInt32,
1810            ScalarValue::UInt64(_) => DataType::UInt64,
1811            ScalarValue::Int8(_) => DataType::Int8,
1812            ScalarValue::Int16(_) => DataType::Int16,
1813            ScalarValue::Int32(_) => DataType::Int32,
1814            ScalarValue::Int64(_) => DataType::Int64,
1815            ScalarValue::Decimal32(_, precision, scale) => {
1816                DataType::Decimal32(*precision, *scale)
1817            }
1818            ScalarValue::Decimal64(_, precision, scale) => {
1819                DataType::Decimal64(*precision, *scale)
1820            }
1821            ScalarValue::Decimal128(_, precision, scale) => {
1822                DataType::Decimal128(*precision, *scale)
1823            }
1824            ScalarValue::Decimal256(_, precision, scale) => {
1825                DataType::Decimal256(*precision, *scale)
1826            }
1827            ScalarValue::TimestampSecond(_, tz_opt) => {
1828                DataType::Timestamp(TimeUnit::Second, tz_opt.clone())
1829            }
1830            ScalarValue::TimestampMillisecond(_, tz_opt) => {
1831                DataType::Timestamp(TimeUnit::Millisecond, tz_opt.clone())
1832            }
1833            ScalarValue::TimestampMicrosecond(_, tz_opt) => {
1834                DataType::Timestamp(TimeUnit::Microsecond, tz_opt.clone())
1835            }
1836            ScalarValue::TimestampNanosecond(_, tz_opt) => {
1837                DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone())
1838            }
1839            ScalarValue::Float16(_) => DataType::Float16,
1840            ScalarValue::Float32(_) => DataType::Float32,
1841            ScalarValue::Float64(_) => DataType::Float64,
1842            ScalarValue::Utf8(_) => DataType::Utf8,
1843            ScalarValue::LargeUtf8(_) => DataType::LargeUtf8,
1844            ScalarValue::Utf8View(_) => DataType::Utf8View,
1845            ScalarValue::Binary(_) => DataType::Binary,
1846            ScalarValue::BinaryView(_) => DataType::BinaryView,
1847            ScalarValue::FixedSizeBinary(sz, _) => DataType::FixedSizeBinary(*sz),
1848            ScalarValue::LargeBinary(_) => DataType::LargeBinary,
1849            ScalarValue::List(arr) => arr.data_type().to_owned(),
1850            ScalarValue::LargeList(arr) => arr.data_type().to_owned(),
1851            ScalarValue::FixedSizeList(arr) => arr.data_type().to_owned(),
1852            ScalarValue::Struct(arr) => arr.data_type().to_owned(),
1853            ScalarValue::Map(arr) => arr.data_type().to_owned(),
1854            ScalarValue::Date32(_) => DataType::Date32,
1855            ScalarValue::Date64(_) => DataType::Date64,
1856            ScalarValue::Time32Second(_) => DataType::Time32(TimeUnit::Second),
1857            ScalarValue::Time32Millisecond(_) => DataType::Time32(TimeUnit::Millisecond),
1858            ScalarValue::Time64Microsecond(_) => DataType::Time64(TimeUnit::Microsecond),
1859            ScalarValue::Time64Nanosecond(_) => DataType::Time64(TimeUnit::Nanosecond),
1860            ScalarValue::IntervalYearMonth(_) => {
1861                DataType::Interval(IntervalUnit::YearMonth)
1862            }
1863            ScalarValue::IntervalDayTime(_) => DataType::Interval(IntervalUnit::DayTime),
1864            ScalarValue::IntervalMonthDayNano(_) => {
1865                DataType::Interval(IntervalUnit::MonthDayNano)
1866            }
1867            ScalarValue::DurationSecond(_) => DataType::Duration(TimeUnit::Second),
1868            ScalarValue::DurationMillisecond(_) => {
1869                DataType::Duration(TimeUnit::Millisecond)
1870            }
1871            ScalarValue::DurationMicrosecond(_) => {
1872                DataType::Duration(TimeUnit::Microsecond)
1873            }
1874            ScalarValue::DurationNanosecond(_) => {
1875                DataType::Duration(TimeUnit::Nanosecond)
1876            }
1877            ScalarValue::Union(_, fields, mode) => DataType::Union(fields.clone(), *mode),
1878            ScalarValue::Dictionary(k, v) => {
1879                DataType::Dictionary(k.clone(), Box::new(v.data_type()))
1880            }
1881            ScalarValue::Null => DataType::Null,
1882        }
1883    }
1884
1885    /// Calculate arithmetic negation for a scalar value
1886    pub fn arithmetic_negate(&self) -> Result<Self> {
1887        fn neg_checked_with_ctx<T: ArrowNativeTypeOp>(
1888            v: T,
1889            ctx: impl Fn() -> String,
1890        ) -> Result<T> {
1891            v.neg_checked()
1892                .map_err(|e| arrow_datafusion_err!(e).context(ctx()))
1893        }
1894        match self {
1895            ScalarValue::Int8(None)
1896            | ScalarValue::Int16(None)
1897            | ScalarValue::Int32(None)
1898            | ScalarValue::Int64(None)
1899            | ScalarValue::Float16(None)
1900            | ScalarValue::Float32(None)
1901            | ScalarValue::Float64(None) => Ok(self.clone()),
1902            ScalarValue::Float16(Some(v)) => {
1903                Ok(ScalarValue::Float16(Some(f16::from_f32(-v.to_f32()))))
1904            }
1905            ScalarValue::Float64(Some(v)) => Ok(ScalarValue::Float64(Some(-v))),
1906            ScalarValue::Float32(Some(v)) => Ok(ScalarValue::Float32(Some(-v))),
1907            ScalarValue::Int8(Some(v)) => Ok(ScalarValue::Int8(Some(v.neg_checked()?))),
1908            ScalarValue::Int16(Some(v)) => Ok(ScalarValue::Int16(Some(v.neg_checked()?))),
1909            ScalarValue::Int32(Some(v)) => Ok(ScalarValue::Int32(Some(v.neg_checked()?))),
1910            ScalarValue::Int64(Some(v)) => Ok(ScalarValue::Int64(Some(v.neg_checked()?))),
1911            ScalarValue::IntervalYearMonth(Some(v)) => Ok(
1912                ScalarValue::IntervalYearMonth(Some(neg_checked_with_ctx(*v, || {
1913                    format!("In negation of IntervalYearMonth({v})")
1914                })?)),
1915            ),
1916            ScalarValue::IntervalDayTime(Some(v)) => {
1917                let (days, ms) = IntervalDayTimeType::to_parts(*v);
1918                let val = IntervalDayTimeType::make_value(
1919                    neg_checked_with_ctx(days, || {
1920                        format!("In negation of days {days} in IntervalDayTime")
1921                    })?,
1922                    neg_checked_with_ctx(ms, || {
1923                        format!("In negation of milliseconds {ms} in IntervalDayTime")
1924                    })?,
1925                );
1926                Ok(ScalarValue::IntervalDayTime(Some(val)))
1927            }
1928            ScalarValue::IntervalMonthDayNano(Some(v)) => {
1929                let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(*v);
1930                let val = IntervalMonthDayNanoType::make_value(
1931                    neg_checked_with_ctx(months, || {
1932                        format!("In negation of months {months} of IntervalMonthDayNano")
1933                    })?,
1934                    neg_checked_with_ctx(days, || {
1935                        format!("In negation of days {days} of IntervalMonthDayNano")
1936                    })?,
1937                    neg_checked_with_ctx(nanos, || {
1938                        format!("In negation of nanos {nanos} of IntervalMonthDayNano")
1939                    })?,
1940                );
1941                Ok(ScalarValue::IntervalMonthDayNano(Some(val)))
1942            }
1943            ScalarValue::Decimal32(Some(v), precision, scale) => {
1944                Ok(ScalarValue::Decimal32(
1945                    Some(neg_checked_with_ctx(*v, || {
1946                        format!("In negation of Decimal32({v}, {precision}, {scale})")
1947                    })?),
1948                    *precision,
1949                    *scale,
1950                ))
1951            }
1952            ScalarValue::Decimal64(Some(v), precision, scale) => {
1953                Ok(ScalarValue::Decimal64(
1954                    Some(neg_checked_with_ctx(*v, || {
1955                        format!("In negation of Decimal64({v}, {precision}, {scale})")
1956                    })?),
1957                    *precision,
1958                    *scale,
1959                ))
1960            }
1961            ScalarValue::Decimal128(Some(v), precision, scale) => {
1962                Ok(ScalarValue::Decimal128(
1963                    Some(neg_checked_with_ctx(*v, || {
1964                        format!("In negation of Decimal128({v}, {precision}, {scale})")
1965                    })?),
1966                    *precision,
1967                    *scale,
1968                ))
1969            }
1970            ScalarValue::Decimal256(Some(v), precision, scale) => {
1971                Ok(ScalarValue::Decimal256(
1972                    Some(neg_checked_with_ctx(*v, || {
1973                        format!("In negation of Decimal256({v}, {precision}, {scale})")
1974                    })?),
1975                    *precision,
1976                    *scale,
1977                ))
1978            }
1979            ScalarValue::TimestampSecond(Some(v), tz) => {
1980                Ok(ScalarValue::TimestampSecond(
1981                    Some(neg_checked_with_ctx(*v, || {
1982                        format!("In negation of TimestampSecond({v})")
1983                    })?),
1984                    tz.clone(),
1985                ))
1986            }
1987            ScalarValue::TimestampNanosecond(Some(v), tz) => {
1988                Ok(ScalarValue::TimestampNanosecond(
1989                    Some(neg_checked_with_ctx(*v, || {
1990                        format!("In negation of TimestampNanoSecond({v})")
1991                    })?),
1992                    tz.clone(),
1993                ))
1994            }
1995            ScalarValue::TimestampMicrosecond(Some(v), tz) => {
1996                Ok(ScalarValue::TimestampMicrosecond(
1997                    Some(neg_checked_with_ctx(*v, || {
1998                        format!("In negation of TimestampMicroSecond({v})")
1999                    })?),
2000                    tz.clone(),
2001                ))
2002            }
2003            ScalarValue::TimestampMillisecond(Some(v), tz) => {
2004                Ok(ScalarValue::TimestampMillisecond(
2005                    Some(neg_checked_with_ctx(*v, || {
2006                        format!("In negation of TimestampMilliSecond({v})")
2007                    })?),
2008                    tz.clone(),
2009                ))
2010            }
2011            value => _internal_err!(
2012                "Can not run arithmetic negative on scalar value {value:?}"
2013            ),
2014        }
2015    }
2016
2017    /// Wrapping addition of `ScalarValue`
2018    ///
2019    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
2020    /// should operate on Arrays directly, using vectorized array kernels
2021    pub fn add<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2022        let r = add_wrapping(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2023        Self::try_from_array(r.as_ref(), 0)
2024    }
2025    /// Checked addition of `ScalarValue`
2026    ///
2027    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
2028    /// should operate on Arrays directly, using vectorized array kernels
2029    pub fn add_checked<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2030        let r = add(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2031        Self::try_from_array(r.as_ref(), 0)
2032    }
2033
2034    /// Wrapping subtraction of `ScalarValue`
2035    ///
2036    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
2037    /// should operate on Arrays directly, using vectorized array kernels
2038    pub fn sub<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2039        let r = sub_wrapping(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2040        Self::try_from_array(r.as_ref(), 0)
2041    }
2042
2043    /// Checked subtraction of `ScalarValue`
2044    ///
2045    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
2046    /// should operate on Arrays directly, using vectorized array kernels
2047    pub fn sub_checked<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2048        let r = sub(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2049        Self::try_from_array(r.as_ref(), 0)
2050    }
2051
2052    /// Wrapping multiplication of `ScalarValue`
2053    ///
2054    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
2055    /// should operate on Arrays directly, using vectorized array kernels.
2056    pub fn mul<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2057        let r = mul_wrapping(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2058        Self::try_from_array(r.as_ref(), 0)
2059    }
2060
2061    /// Checked multiplication of `ScalarValue`
2062    ///
2063    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
2064    /// should operate on Arrays directly, using vectorized array kernels.
2065    pub fn mul_checked<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2066        let r = mul(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2067        Self::try_from_array(r.as_ref(), 0)
2068    }
2069
2070    /// Performs `lhs / rhs`
2071    ///
2072    /// Overflow or division by zero will result in an error, with exception to
2073    /// floating point numbers, which instead follow the IEEE 754 rules.
2074    ///
2075    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
2076    /// should operate on Arrays directly, using vectorized array kernels.
2077    pub fn div<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2078        let r = div(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2079        Self::try_from_array(r.as_ref(), 0)
2080    }
2081
2082    /// Performs `lhs % rhs`
2083    ///
2084    /// Overflow or division by zero will result in an error, with exception to
2085    /// floating point numbers, which instead follow the IEEE 754 rules.
2086    ///
2087    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
2088    /// should operate on Arrays directly, using vectorized array kernels.
2089    pub fn rem<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2090        let r = rem(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2091        Self::try_from_array(r.as_ref(), 0)
2092    }
2093
2094    pub fn is_unsigned(&self) -> bool {
2095        matches!(
2096            self,
2097            ScalarValue::UInt8(_)
2098                | ScalarValue::UInt16(_)
2099                | ScalarValue::UInt32(_)
2100                | ScalarValue::UInt64(_)
2101        )
2102    }
2103
2104    /// whether this value is null or not.
2105    pub fn is_null(&self) -> bool {
2106        match self {
2107            ScalarValue::Boolean(v) => v.is_none(),
2108            ScalarValue::Null => true,
2109            ScalarValue::Float16(v) => v.is_none(),
2110            ScalarValue::Float32(v) => v.is_none(),
2111            ScalarValue::Float64(v) => v.is_none(),
2112            ScalarValue::Decimal32(v, _, _) => v.is_none(),
2113            ScalarValue::Decimal64(v, _, _) => v.is_none(),
2114            ScalarValue::Decimal128(v, _, _) => v.is_none(),
2115            ScalarValue::Decimal256(v, _, _) => v.is_none(),
2116            ScalarValue::Int8(v) => v.is_none(),
2117            ScalarValue::Int16(v) => v.is_none(),
2118            ScalarValue::Int32(v) => v.is_none(),
2119            ScalarValue::Int64(v) => v.is_none(),
2120            ScalarValue::UInt8(v) => v.is_none(),
2121            ScalarValue::UInt16(v) => v.is_none(),
2122            ScalarValue::UInt32(v) => v.is_none(),
2123            ScalarValue::UInt64(v) => v.is_none(),
2124            ScalarValue::Utf8(v)
2125            | ScalarValue::Utf8View(v)
2126            | ScalarValue::LargeUtf8(v) => v.is_none(),
2127            ScalarValue::Binary(v)
2128            | ScalarValue::BinaryView(v)
2129            | ScalarValue::FixedSizeBinary(_, v)
2130            | ScalarValue::LargeBinary(v) => v.is_none(),
2131            // arr.len() should be 1 for a list scalar, but we don't seem to
2132            // enforce that anywhere, so we still check against array length.
2133            ScalarValue::List(arr) => arr.len() == arr.null_count(),
2134            ScalarValue::LargeList(arr) => arr.len() == arr.null_count(),
2135            ScalarValue::FixedSizeList(arr) => arr.len() == arr.null_count(),
2136            ScalarValue::Struct(arr) => arr.len() == arr.null_count(),
2137            ScalarValue::Map(arr) => arr.len() == arr.null_count(),
2138            ScalarValue::Date32(v) => v.is_none(),
2139            ScalarValue::Date64(v) => v.is_none(),
2140            ScalarValue::Time32Second(v) => v.is_none(),
2141            ScalarValue::Time32Millisecond(v) => v.is_none(),
2142            ScalarValue::Time64Microsecond(v) => v.is_none(),
2143            ScalarValue::Time64Nanosecond(v) => v.is_none(),
2144            ScalarValue::TimestampSecond(v, _) => v.is_none(),
2145            ScalarValue::TimestampMillisecond(v, _) => v.is_none(),
2146            ScalarValue::TimestampMicrosecond(v, _) => v.is_none(),
2147            ScalarValue::TimestampNanosecond(v, _) => v.is_none(),
2148            ScalarValue::IntervalYearMonth(v) => v.is_none(),
2149            ScalarValue::IntervalDayTime(v) => v.is_none(),
2150            ScalarValue::IntervalMonthDayNano(v) => v.is_none(),
2151            ScalarValue::DurationSecond(v) => v.is_none(),
2152            ScalarValue::DurationMillisecond(v) => v.is_none(),
2153            ScalarValue::DurationMicrosecond(v) => v.is_none(),
2154            ScalarValue::DurationNanosecond(v) => v.is_none(),
2155            ScalarValue::Union(v, _, _) => match v {
2156                Some((_, s)) => s.is_null(),
2157                None => true,
2158            },
2159            ScalarValue::Dictionary(_, v) => v.is_null(),
2160        }
2161    }
2162
2163    /// Absolute distance between two numeric values (of the same type). This method will return
2164    /// None if either one of the arguments are null. It might also return None if the resulting
2165    /// distance is greater than [`usize::MAX`]. If the type is a float, then the distance will be
2166    /// rounded to the nearest integer.
2167    ///
2168    ///
2169    /// Note: the datatype itself must support subtraction.
2170    pub fn distance(&self, other: &ScalarValue) -> Option<usize> {
2171        match (self, other) {
2172            (Self::Int8(Some(l)), Self::Int8(Some(r))) => Some(l.abs_diff(*r) as _),
2173            (Self::Int16(Some(l)), Self::Int16(Some(r))) => Some(l.abs_diff(*r) as _),
2174            (Self::Int32(Some(l)), Self::Int32(Some(r))) => Some(l.abs_diff(*r) as _),
2175            (Self::Int64(Some(l)), Self::Int64(Some(r))) => Some(l.abs_diff(*r) as _),
2176            (Self::UInt8(Some(l)), Self::UInt8(Some(r))) => Some(l.abs_diff(*r) as _),
2177            (Self::UInt16(Some(l)), Self::UInt16(Some(r))) => Some(l.abs_diff(*r) as _),
2178            (Self::UInt32(Some(l)), Self::UInt32(Some(r))) => Some(l.abs_diff(*r) as _),
2179            (Self::UInt64(Some(l)), Self::UInt64(Some(r))) => Some(l.abs_diff(*r) as _),
2180            // TODO: we might want to look into supporting ceil/floor here for floats.
2181            (Self::Float16(Some(l)), Self::Float16(Some(r))) => {
2182                Some((f16::to_f32(*l) - f16::to_f32(*r)).abs().round() as _)
2183            }
2184            (Self::Float32(Some(l)), Self::Float32(Some(r))) => {
2185                Some((l - r).abs().round() as _)
2186            }
2187            (Self::Float64(Some(l)), Self::Float64(Some(r))) => {
2188                Some((l - r).abs().round() as _)
2189            }
2190            (
2191                Self::Decimal128(Some(l), lprecision, lscale),
2192                Self::Decimal128(Some(r), rprecision, rscale),
2193            ) => {
2194                if lprecision == rprecision && lscale == rscale {
2195                    l.checked_sub(*r)?.checked_abs()?.to_usize()
2196                } else {
2197                    None
2198                }
2199            }
2200            (
2201                Self::Decimal256(Some(l), lprecision, lscale),
2202                Self::Decimal256(Some(r), rprecision, rscale),
2203            ) => {
2204                if lprecision == rprecision && lscale == rscale {
2205                    l.checked_sub(*r)?.checked_abs()?.to_usize()
2206                } else {
2207                    None
2208                }
2209            }
2210            _ => None,
2211        }
2212    }
2213
2214    /// Converts a scalar value into an 1-row array.
2215    ///
2216    /// # Errors
2217    ///
2218    /// Errors if the ScalarValue cannot be converted into a 1-row array
2219    pub fn to_array(&self) -> Result<ArrayRef> {
2220        self.to_array_of_size(1)
2221    }
2222
2223    /// Converts a scalar into an arrow [`Scalar`] (which implements
2224    /// the [`Datum`] interface).
2225    ///
2226    /// This can be used to call arrow compute kernels such as `lt`
2227    ///
2228    /// # Errors
2229    ///
2230    /// Errors if the ScalarValue cannot be converted into a 1-row array
2231    ///
2232    /// # Example
2233    /// ```
2234    /// use arrow::array::{BooleanArray, Int32Array};
2235    /// use datafusion_common::ScalarValue;
2236    ///
2237    /// let arr = Int32Array::from(vec![Some(1), None, Some(10)]);
2238    /// let five = ScalarValue::Int32(Some(5));
2239    ///
2240    /// let result =
2241    ///     arrow::compute::kernels::cmp::lt(&arr, &five.to_scalar().unwrap()).unwrap();
2242    ///
2243    /// let expected = BooleanArray::from(vec![Some(true), None, Some(false)]);
2244    ///
2245    /// assert_eq!(&result, &expected);
2246    /// ```
2247    /// [`Datum`]: arrow::array::Datum
2248    pub fn to_scalar(&self) -> Result<Scalar<ArrayRef>> {
2249        Ok(Scalar::new(self.to_array_of_size(1)?))
2250    }
2251
2252    /// Converts an iterator of references [`ScalarValue`] into an [`ArrayRef`]
2253    /// corresponding to those values. For example, an iterator of
2254    /// [`ScalarValue::Int32`] would be converted to an [`Int32Array`].
2255    ///
2256    /// Returns an error if the iterator is empty or if the
2257    /// [`ScalarValue`]s are not all the same type
2258    ///
2259    /// # Example
2260    /// ```
2261    /// use arrow::array::{ArrayRef, BooleanArray};
2262    /// use datafusion_common::ScalarValue;
2263    ///
2264    /// let scalars = vec![
2265    ///     ScalarValue::Boolean(Some(true)),
2266    ///     ScalarValue::Boolean(None),
2267    ///     ScalarValue::Boolean(Some(false)),
2268    /// ];
2269    ///
2270    /// // Build an Array from the list of ScalarValues
2271    /// let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
2272    ///
2273    /// let expected: ArrayRef =
2274    ///     std::sync::Arc::new(BooleanArray::from(vec![Some(true), None, Some(false)]));
2275    ///
2276    /// assert_eq!(&array, &expected);
2277    /// ```
2278    pub fn iter_to_array(
2279        scalars: impl IntoIterator<Item = ScalarValue>,
2280    ) -> Result<ArrayRef> {
2281        let mut scalars = scalars.into_iter().peekable();
2282
2283        // figure out the type based on the first element
2284        let data_type = match scalars.peek() {
2285            None => {
2286                return _exec_err!("Empty iterator passed to ScalarValue::iter_to_array");
2287            }
2288            Some(sv) => sv.data_type(),
2289        };
2290
2291        /// Creates an array of $ARRAY_TY by unpacking values of
2292        /// SCALAR_TY for primitive types
2293        macro_rules! build_array_primitive {
2294            ($ARRAY_TY:ident, $SCALAR_TY:ident) => {{
2295                {
2296                    let array = scalars.map(|sv| {
2297                        if let ScalarValue::$SCALAR_TY(v) = sv {
2298                            Ok(v)
2299                        } else {
2300                            _exec_err!(
2301                                "Inconsistent types in ScalarValue::iter_to_array. \
2302                                    Expected {:?}, got {:?}",
2303                                data_type, sv
2304                            )
2305                        }
2306                    })
2307                    .collect::<Result<$ARRAY_TY>>()?;
2308                    Arc::new(array)
2309                }
2310            }};
2311        }
2312
2313        macro_rules! build_array_primitive_tz {
2314            ($ARRAY_TY:ident, $SCALAR_TY:ident, $TZ:expr) => {{
2315                {
2316                    let array = scalars.map(|sv| {
2317                        if let ScalarValue::$SCALAR_TY(v, _) = sv {
2318                            Ok(v)
2319                        } else {
2320                            _exec_err!(
2321                                "Inconsistent types in ScalarValue::iter_to_array. \
2322                                    Expected {:?}, got {:?}",
2323                                data_type, sv
2324                            )
2325                        }
2326                    })
2327                    .collect::<Result<$ARRAY_TY>>()?;
2328                    Arc::new(array.with_timezone_opt($TZ.clone()))
2329                }
2330            }};
2331        }
2332
2333        /// Creates an array of $ARRAY_TY by unpacking values of
2334        /// SCALAR_TY for "string-like" types.
2335        macro_rules! build_array_string {
2336            ($ARRAY_TY:ident, $SCALAR_TY:ident) => {{
2337                {
2338                    let array = scalars.map(|sv| {
2339                        if let ScalarValue::$SCALAR_TY(v) = sv {
2340                            Ok(v)
2341                        } else {
2342                            _exec_err!(
2343                                "Inconsistent types in ScalarValue::iter_to_array. \
2344                                    Expected {:?}, got {:?}",
2345                                data_type, sv
2346                            )
2347                        }
2348                    })
2349                    .collect::<Result<$ARRAY_TY>>()?;
2350                    Arc::new(array)
2351                }
2352            }};
2353        }
2354
2355        let array: ArrayRef = match &data_type {
2356            DataType::Decimal32(precision, scale) => {
2357                let decimal_array =
2358                    ScalarValue::iter_to_decimal32_array(scalars, *precision, *scale)?;
2359                Arc::new(decimal_array)
2360            }
2361            DataType::Decimal64(precision, scale) => {
2362                let decimal_array =
2363                    ScalarValue::iter_to_decimal64_array(scalars, *precision, *scale)?;
2364                Arc::new(decimal_array)
2365            }
2366            DataType::Decimal128(precision, scale) => {
2367                let decimal_array =
2368                    ScalarValue::iter_to_decimal128_array(scalars, *precision, *scale)?;
2369                Arc::new(decimal_array)
2370            }
2371            DataType::Decimal256(precision, scale) => {
2372                let decimal_array =
2373                    ScalarValue::iter_to_decimal256_array(scalars, *precision, *scale)?;
2374                Arc::new(decimal_array)
2375            }
2376            DataType::Null => ScalarValue::iter_to_null_array(scalars)?,
2377            DataType::Boolean => build_array_primitive!(BooleanArray, Boolean),
2378            DataType::Float16 => build_array_primitive!(Float16Array, Float16),
2379            DataType::Float32 => build_array_primitive!(Float32Array, Float32),
2380            DataType::Float64 => build_array_primitive!(Float64Array, Float64),
2381            DataType::Int8 => build_array_primitive!(Int8Array, Int8),
2382            DataType::Int16 => build_array_primitive!(Int16Array, Int16),
2383            DataType::Int32 => build_array_primitive!(Int32Array, Int32),
2384            DataType::Int64 => build_array_primitive!(Int64Array, Int64),
2385            DataType::UInt8 => build_array_primitive!(UInt8Array, UInt8),
2386            DataType::UInt16 => build_array_primitive!(UInt16Array, UInt16),
2387            DataType::UInt32 => build_array_primitive!(UInt32Array, UInt32),
2388            DataType::UInt64 => build_array_primitive!(UInt64Array, UInt64),
2389            DataType::Utf8View => build_array_string!(StringViewArray, Utf8View),
2390            DataType::Utf8 => build_array_string!(StringArray, Utf8),
2391            DataType::LargeUtf8 => build_array_string!(LargeStringArray, LargeUtf8),
2392            DataType::BinaryView => build_array_string!(BinaryViewArray, BinaryView),
2393            DataType::Binary => build_array_string!(BinaryArray, Binary),
2394            DataType::LargeBinary => build_array_string!(LargeBinaryArray, LargeBinary),
2395            DataType::Date32 => build_array_primitive!(Date32Array, Date32),
2396            DataType::Date64 => build_array_primitive!(Date64Array, Date64),
2397            DataType::Time32(TimeUnit::Second) => {
2398                build_array_primitive!(Time32SecondArray, Time32Second)
2399            }
2400            DataType::Time32(TimeUnit::Millisecond) => {
2401                build_array_primitive!(Time32MillisecondArray, Time32Millisecond)
2402            }
2403            DataType::Time64(TimeUnit::Microsecond) => {
2404                build_array_primitive!(Time64MicrosecondArray, Time64Microsecond)
2405            }
2406            DataType::Time64(TimeUnit::Nanosecond) => {
2407                build_array_primitive!(Time64NanosecondArray, Time64Nanosecond)
2408            }
2409            DataType::Timestamp(TimeUnit::Second, tz) => {
2410                build_array_primitive_tz!(TimestampSecondArray, TimestampSecond, tz)
2411            }
2412            DataType::Timestamp(TimeUnit::Millisecond, tz) => {
2413                build_array_primitive_tz!(
2414                    TimestampMillisecondArray,
2415                    TimestampMillisecond,
2416                    tz
2417                )
2418            }
2419            DataType::Timestamp(TimeUnit::Microsecond, tz) => {
2420                build_array_primitive_tz!(
2421                    TimestampMicrosecondArray,
2422                    TimestampMicrosecond,
2423                    tz
2424                )
2425            }
2426            DataType::Timestamp(TimeUnit::Nanosecond, tz) => {
2427                build_array_primitive_tz!(
2428                    TimestampNanosecondArray,
2429                    TimestampNanosecond,
2430                    tz
2431                )
2432            }
2433            DataType::Duration(TimeUnit::Second) => {
2434                build_array_primitive!(DurationSecondArray, DurationSecond)
2435            }
2436            DataType::Duration(TimeUnit::Millisecond) => {
2437                build_array_primitive!(DurationMillisecondArray, DurationMillisecond)
2438            }
2439            DataType::Duration(TimeUnit::Microsecond) => {
2440                build_array_primitive!(DurationMicrosecondArray, DurationMicrosecond)
2441            }
2442            DataType::Duration(TimeUnit::Nanosecond) => {
2443                build_array_primitive!(DurationNanosecondArray, DurationNanosecond)
2444            }
2445            DataType::Interval(IntervalUnit::DayTime) => {
2446                build_array_primitive!(IntervalDayTimeArray, IntervalDayTime)
2447            }
2448            DataType::Interval(IntervalUnit::YearMonth) => {
2449                build_array_primitive!(IntervalYearMonthArray, IntervalYearMonth)
2450            }
2451            DataType::Interval(IntervalUnit::MonthDayNano) => {
2452                build_array_primitive!(IntervalMonthDayNanoArray, IntervalMonthDayNano)
2453            }
2454            DataType::FixedSizeList(_, _) => {
2455                // arrow::compute::concat does not allow inconsistent types including the size of FixedSizeList.
2456                // The length of nulls here we got is 1, so we need to resize the length of nulls to
2457                // the length of non-nulls.
2458                let mut arrays =
2459                    scalars.map(|s| s.to_array()).collect::<Result<Vec<_>>>()?;
2460                let first_non_null_data_type = arrays
2461                    .iter()
2462                    .find(|sv| !sv.is_null(0))
2463                    .map(|sv| sv.data_type().to_owned());
2464                if let Some(DataType::FixedSizeList(f, l)) = first_non_null_data_type {
2465                    for array in arrays.iter_mut() {
2466                        if array.is_null(0) {
2467                            *array = Arc::new(FixedSizeListArray::new_null(
2468                                Arc::clone(&f),
2469                                l,
2470                                1,
2471                            ));
2472                        }
2473                    }
2474                }
2475                let arrays = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
2476                arrow::compute::concat(arrays.as_slice())?
2477            }
2478            DataType::List(_)
2479            | DataType::LargeList(_)
2480            | DataType::Map(_, _)
2481            | DataType::Struct(_)
2482            | DataType::Union(_, _) => {
2483                let arrays = scalars.map(|s| s.to_array()).collect::<Result<Vec<_>>>()?;
2484                let arrays = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
2485                arrow::compute::concat(arrays.as_slice())?
2486            }
2487            DataType::Dictionary(key_type, value_type) => {
2488                // create the values array
2489                let value_scalars = scalars
2490                    .map(|scalar| match scalar {
2491                        ScalarValue::Dictionary(inner_key_type, scalar) => {
2492                            if &inner_key_type == key_type {
2493                                Ok(*scalar)
2494                            } else {
2495                                _exec_err!("Expected inner key type of {key_type} but found: {inner_key_type}, value was ({scalar:?})")
2496                            }
2497                        }
2498                        _ => {
2499                            _exec_err!(
2500                                "Expected scalar of type {value_type} but found: {scalar} {scalar:?}"
2501                            )
2502                        }
2503                    })
2504                    .collect::<Result<Vec<_>>>()?;
2505
2506                let values = Self::iter_to_array(value_scalars)?;
2507                assert_eq!(values.data_type(), value_type.as_ref());
2508
2509                match key_type.as_ref() {
2510                    DataType::Int8 => dict_from_values::<Int8Type>(values)?,
2511                    DataType::Int16 => dict_from_values::<Int16Type>(values)?,
2512                    DataType::Int32 => dict_from_values::<Int32Type>(values)?,
2513                    DataType::Int64 => dict_from_values::<Int64Type>(values)?,
2514                    DataType::UInt8 => dict_from_values::<UInt8Type>(values)?,
2515                    DataType::UInt16 => dict_from_values::<UInt16Type>(values)?,
2516                    DataType::UInt32 => dict_from_values::<UInt32Type>(values)?,
2517                    DataType::UInt64 => dict_from_values::<UInt64Type>(values)?,
2518                    _ => unreachable!("Invalid dictionary keys type: {}", key_type),
2519                }
2520            }
2521            DataType::FixedSizeBinary(size) => {
2522                let array = scalars
2523                    .map(|sv| {
2524                        if let ScalarValue::FixedSizeBinary(_, v) = sv {
2525                            Ok(v)
2526                        } else {
2527                            _exec_err!(
2528                                "Inconsistent types in ScalarValue::iter_to_array. \
2529                                Expected {data_type}, got {sv:?}"
2530                            )
2531                        }
2532                    })
2533                    .collect::<Result<Vec<_>>>()?;
2534                let array = FixedSizeBinaryArray::try_from_sparse_iter_with_size(
2535                    array.into_iter(),
2536                    *size,
2537                )?;
2538                Arc::new(array)
2539            }
2540            // explicitly enumerate unsupported types so newly added
2541            // types must be acknowledged, Time32 and Time64 types are
2542            // not supported if the TimeUnit is not valid (Time32 can
2543            // only be used with Second and Millisecond, Time64 only
2544            // with Microsecond and Nanosecond)
2545            DataType::Time32(TimeUnit::Microsecond)
2546            | DataType::Time32(TimeUnit::Nanosecond)
2547            | DataType::Time64(TimeUnit::Second)
2548            | DataType::Time64(TimeUnit::Millisecond)
2549            | DataType::RunEndEncoded(_, _)
2550            | DataType::ListView(_)
2551            | DataType::LargeListView(_) => {
2552                return _not_impl_err!(
2553                    "Unsupported creation of {:?} array from ScalarValue {:?}",
2554                    data_type,
2555                    scalars.peek()
2556                );
2557            }
2558        };
2559        Ok(array)
2560    }
2561
2562    fn iter_to_null_array(
2563        scalars: impl IntoIterator<Item = ScalarValue>,
2564    ) -> Result<ArrayRef> {
2565        let length = scalars.into_iter().try_fold(
2566            0usize,
2567            |r, element: ScalarValue| match element {
2568                ScalarValue::Null => Ok::<usize, DataFusionError>(r + 1),
2569                s => {
2570                    _internal_err!("Expected ScalarValue::Null element. Received {s:?}")
2571                }
2572            },
2573        )?;
2574        Ok(new_null_array(&DataType::Null, length))
2575    }
2576
2577    fn iter_to_decimal32_array(
2578        scalars: impl IntoIterator<Item = ScalarValue>,
2579        precision: u8,
2580        scale: i8,
2581    ) -> Result<Decimal32Array> {
2582        let array = scalars
2583            .into_iter()
2584            .map(|element: ScalarValue| match element {
2585                ScalarValue::Decimal32(v1, _, _) => Ok(v1),
2586                s => {
2587                    _internal_err!("Expected ScalarValue::Null element. Received {s:?}")
2588                }
2589            })
2590            .collect::<Result<Decimal32Array>>()?
2591            .with_precision_and_scale(precision, scale)?;
2592        Ok(array)
2593    }
2594
2595    fn iter_to_decimal64_array(
2596        scalars: impl IntoIterator<Item = ScalarValue>,
2597        precision: u8,
2598        scale: i8,
2599    ) -> Result<Decimal64Array> {
2600        let array = scalars
2601            .into_iter()
2602            .map(|element: ScalarValue| match element {
2603                ScalarValue::Decimal64(v1, _, _) => Ok(v1),
2604                s => {
2605                    _internal_err!("Expected ScalarValue::Null element. Received {s:?}")
2606                }
2607            })
2608            .collect::<Result<Decimal64Array>>()?
2609            .with_precision_and_scale(precision, scale)?;
2610        Ok(array)
2611    }
2612
2613    fn iter_to_decimal128_array(
2614        scalars: impl IntoIterator<Item = ScalarValue>,
2615        precision: u8,
2616        scale: i8,
2617    ) -> Result<Decimal128Array> {
2618        let array = scalars
2619            .into_iter()
2620            .map(|element: ScalarValue| match element {
2621                ScalarValue::Decimal128(v1, _, _) => Ok(v1),
2622                s => {
2623                    _internal_err!("Expected ScalarValue::Null element. Received {s:?}")
2624                }
2625            })
2626            .collect::<Result<Decimal128Array>>()?
2627            .with_precision_and_scale(precision, scale)?;
2628        Ok(array)
2629    }
2630
2631    fn iter_to_decimal256_array(
2632        scalars: impl IntoIterator<Item = ScalarValue>,
2633        precision: u8,
2634        scale: i8,
2635    ) -> Result<Decimal256Array> {
2636        let array = scalars
2637            .into_iter()
2638            .map(|element: ScalarValue| match element {
2639                ScalarValue::Decimal256(v1, _, _) => Ok(v1),
2640                s => {
2641                    _internal_err!(
2642                        "Expected ScalarValue::Decimal256 element. Received {s:?}"
2643                    )
2644                }
2645            })
2646            .collect::<Result<Decimal256Array>>()?
2647            .with_precision_and_scale(precision, scale)?;
2648        Ok(array)
2649    }
2650
2651    fn build_decimal32_array(
2652        value: Option<i32>,
2653        precision: u8,
2654        scale: i8,
2655        size: usize,
2656    ) -> Result<Decimal32Array> {
2657        Ok(match value {
2658            Some(val) => Decimal32Array::from(vec![val; size])
2659                .with_precision_and_scale(precision, scale)?,
2660            None => {
2661                let mut builder = Decimal32Array::builder(size)
2662                    .with_precision_and_scale(precision, scale)?;
2663                builder.append_nulls(size);
2664                builder.finish()
2665            }
2666        })
2667    }
2668
2669    fn build_decimal64_array(
2670        value: Option<i64>,
2671        precision: u8,
2672        scale: i8,
2673        size: usize,
2674    ) -> Result<Decimal64Array> {
2675        Ok(match value {
2676            Some(val) => Decimal64Array::from(vec![val; size])
2677                .with_precision_and_scale(precision, scale)?,
2678            None => {
2679                let mut builder = Decimal64Array::builder(size)
2680                    .with_precision_and_scale(precision, scale)?;
2681                builder.append_nulls(size);
2682                builder.finish()
2683            }
2684        })
2685    }
2686
2687    fn build_decimal128_array(
2688        value: Option<i128>,
2689        precision: u8,
2690        scale: i8,
2691        size: usize,
2692    ) -> Result<Decimal128Array> {
2693        Ok(match value {
2694            Some(val) => Decimal128Array::from(vec![val; size])
2695                .with_precision_and_scale(precision, scale)?,
2696            None => {
2697                let mut builder = Decimal128Array::builder(size)
2698                    .with_precision_and_scale(precision, scale)?;
2699                builder.append_nulls(size);
2700                builder.finish()
2701            }
2702        })
2703    }
2704
2705    fn build_decimal256_array(
2706        value: Option<i256>,
2707        precision: u8,
2708        scale: i8,
2709        size: usize,
2710    ) -> Result<Decimal256Array> {
2711        Ok(repeat_n(value, size)
2712            .collect::<Decimal256Array>()
2713            .with_precision_and_scale(precision, scale)?)
2714    }
2715
2716    /// Converts `Vec<ScalarValue>` where each element has type corresponding to
2717    /// `data_type`, to a single element [`ListArray`].
2718    ///
2719    /// Example
2720    /// ```
2721    /// use arrow::array::{Int32Array, ListArray};
2722    /// use arrow::datatypes::{DataType, Int32Type};
2723    /// use datafusion_common::cast::as_list_array;
2724    /// use datafusion_common::ScalarValue;
2725    ///
2726    /// let scalars = vec![
2727    ///     ScalarValue::Int32(Some(1)),
2728    ///     ScalarValue::Int32(None),
2729    ///     ScalarValue::Int32(Some(2)),
2730    /// ];
2731    ///
2732    /// let result = ScalarValue::new_list(&scalars, &DataType::Int32, true);
2733    ///
2734    /// let expected = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
2735    ///     Some(1),
2736    ///     None,
2737    ///     Some(2),
2738    /// ])]);
2739    ///
2740    /// assert_eq!(*result, expected);
2741    /// ```
2742    pub fn new_list(
2743        values: &[ScalarValue],
2744        data_type: &DataType,
2745        nullable: bool,
2746    ) -> Arc<ListArray> {
2747        let values = if values.is_empty() {
2748            new_empty_array(data_type)
2749        } else {
2750            Self::iter_to_array(values.iter().cloned()).unwrap()
2751        };
2752        Arc::new(
2753            SingleRowListArrayBuilder::new(values)
2754                .with_nullable(nullable)
2755                .build_list_array(),
2756        )
2757    }
2758
2759    /// Same as [`ScalarValue::new_list`] but with nullable set to true.
2760    pub fn new_list_nullable(
2761        values: &[ScalarValue],
2762        data_type: &DataType,
2763    ) -> Arc<ListArray> {
2764        Self::new_list(values, data_type, true)
2765    }
2766
2767    /// Create ListArray with Null with specific data type
2768    ///
2769    /// - new_null_list(i32, nullable, 1): `ListArray[NULL]`
2770    pub fn new_null_list(data_type: DataType, nullable: bool, null_len: usize) -> Self {
2771        let data_type = DataType::List(Field::new_list_field(data_type, nullable).into());
2772        Self::List(Arc::new(ListArray::from(ArrayData::new_null(
2773            &data_type, null_len,
2774        ))))
2775    }
2776
2777    /// Converts `IntoIterator<Item = ScalarValue>` where each element has type corresponding to
2778    /// `data_type`, to a [`ListArray`].
2779    ///
2780    /// Example
2781    /// ```
2782    /// use arrow::array::{Int32Array, ListArray};
2783    /// use arrow::datatypes::{DataType, Int32Type};
2784    /// use datafusion_common::cast::as_list_array;
2785    /// use datafusion_common::ScalarValue;
2786    ///
2787    /// let scalars = vec![
2788    ///     ScalarValue::Int32(Some(1)),
2789    ///     ScalarValue::Int32(None),
2790    ///     ScalarValue::Int32(Some(2)),
2791    /// ];
2792    ///
2793    /// let result =
2794    ///     ScalarValue::new_list_from_iter(scalars.into_iter(), &DataType::Int32, true);
2795    ///
2796    /// let expected = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
2797    ///     Some(1),
2798    ///     None,
2799    ///     Some(2),
2800    /// ])]);
2801    ///
2802    /// assert_eq!(*result, expected);
2803    /// ```
2804    pub fn new_list_from_iter(
2805        values: impl IntoIterator<Item = ScalarValue> + ExactSizeIterator,
2806        data_type: &DataType,
2807        nullable: bool,
2808    ) -> Arc<ListArray> {
2809        let values = if values.len() == 0 {
2810            new_empty_array(data_type)
2811        } else {
2812            Self::iter_to_array(values).unwrap()
2813        };
2814        Arc::new(
2815            SingleRowListArrayBuilder::new(values)
2816                .with_nullable(nullable)
2817                .build_list_array(),
2818        )
2819    }
2820
2821    /// Converts `Vec<ScalarValue>` where each element has type corresponding to
2822    /// `data_type`, to a [`LargeListArray`].
2823    ///
2824    /// Example
2825    /// ```
2826    /// use arrow::array::{Int32Array, LargeListArray};
2827    /// use arrow::datatypes::{DataType, Int32Type};
2828    /// use datafusion_common::cast::as_large_list_array;
2829    /// use datafusion_common::ScalarValue;
2830    ///
2831    /// let scalars = vec![
2832    ///     ScalarValue::Int32(Some(1)),
2833    ///     ScalarValue::Int32(None),
2834    ///     ScalarValue::Int32(Some(2)),
2835    /// ];
2836    ///
2837    /// let result = ScalarValue::new_large_list(&scalars, &DataType::Int32);
2838    ///
2839    /// let expected =
2840    ///     LargeListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
2841    ///         Some(1),
2842    ///         None,
2843    ///         Some(2),
2844    ///     ])]);
2845    ///
2846    /// assert_eq!(*result, expected);
2847    /// ```
2848    pub fn new_large_list(
2849        values: &[ScalarValue],
2850        data_type: &DataType,
2851    ) -> Arc<LargeListArray> {
2852        let values = if values.is_empty() {
2853            new_empty_array(data_type)
2854        } else {
2855            Self::iter_to_array(values.iter().cloned()).unwrap()
2856        };
2857        Arc::new(SingleRowListArrayBuilder::new(values).build_large_list_array())
2858    }
2859
2860    /// Converts a scalar value into an array of `size` rows.
2861    ///
2862    /// # Errors
2863    ///
2864    /// Errors if `self` is
2865    /// - a decimal that fails be converted to a decimal array of size
2866    /// - a `FixedsizeList` that fails to be concatenated into an array of size
2867    /// - a `List` that fails to be concatenated into an array of size
2868    /// - a `Dictionary` that fails be converted to a dictionary array of size
2869    pub fn to_array_of_size(&self, size: usize) -> Result<ArrayRef> {
2870        Ok(match self {
2871            ScalarValue::Decimal32(e, precision, scale) => Arc::new(
2872                ScalarValue::build_decimal32_array(*e, *precision, *scale, size)?,
2873            ),
2874            ScalarValue::Decimal64(e, precision, scale) => Arc::new(
2875                ScalarValue::build_decimal64_array(*e, *precision, *scale, size)?,
2876            ),
2877            ScalarValue::Decimal128(e, precision, scale) => Arc::new(
2878                ScalarValue::build_decimal128_array(*e, *precision, *scale, size)?,
2879            ),
2880            ScalarValue::Decimal256(e, precision, scale) => Arc::new(
2881                ScalarValue::build_decimal256_array(*e, *precision, *scale, size)?,
2882            ),
2883            ScalarValue::Boolean(e) => match e {
2884                None => new_null_array(&DataType::Boolean, size),
2885                Some(true) => {
2886                    Arc::new(BooleanArray::new(BooleanBuffer::new_set(size), None))
2887                        as ArrayRef
2888                }
2889                Some(false) => {
2890                    Arc::new(BooleanArray::new(BooleanBuffer::new_unset(size), None))
2891                        as ArrayRef
2892                }
2893            },
2894            ScalarValue::Float64(e) => {
2895                build_array_from_option!(Float64, Float64Array, e, size)
2896            }
2897            ScalarValue::Float32(e) => {
2898                build_array_from_option!(Float32, Float32Array, e, size)
2899            }
2900            ScalarValue::Float16(e) => {
2901                build_array_from_option!(Float16, Float16Array, e, size)
2902            }
2903            ScalarValue::Int8(e) => build_array_from_option!(Int8, Int8Array, e, size),
2904            ScalarValue::Int16(e) => build_array_from_option!(Int16, Int16Array, e, size),
2905            ScalarValue::Int32(e) => build_array_from_option!(Int32, Int32Array, e, size),
2906            ScalarValue::Int64(e) => build_array_from_option!(Int64, Int64Array, e, size),
2907            ScalarValue::UInt8(e) => build_array_from_option!(UInt8, UInt8Array, e, size),
2908            ScalarValue::UInt16(e) => {
2909                build_array_from_option!(UInt16, UInt16Array, e, size)
2910            }
2911            ScalarValue::UInt32(e) => {
2912                build_array_from_option!(UInt32, UInt32Array, e, size)
2913            }
2914            ScalarValue::UInt64(e) => {
2915                build_array_from_option!(UInt64, UInt64Array, e, size)
2916            }
2917            ScalarValue::TimestampSecond(e, tz_opt) => {
2918                build_timestamp_array_from_option!(
2919                    TimeUnit::Second,
2920                    tz_opt.clone(),
2921                    TimestampSecondArray,
2922                    e,
2923                    size
2924                )
2925            }
2926            ScalarValue::TimestampMillisecond(e, tz_opt) => {
2927                build_timestamp_array_from_option!(
2928                    TimeUnit::Millisecond,
2929                    tz_opt.clone(),
2930                    TimestampMillisecondArray,
2931                    e,
2932                    size
2933                )
2934            }
2935
2936            ScalarValue::TimestampMicrosecond(e, tz_opt) => {
2937                build_timestamp_array_from_option!(
2938                    TimeUnit::Microsecond,
2939                    tz_opt.clone(),
2940                    TimestampMicrosecondArray,
2941                    e,
2942                    size
2943                )
2944            }
2945            ScalarValue::TimestampNanosecond(e, tz_opt) => {
2946                build_timestamp_array_from_option!(
2947                    TimeUnit::Nanosecond,
2948                    tz_opt.clone(),
2949                    TimestampNanosecondArray,
2950                    e,
2951                    size
2952                )
2953            }
2954            ScalarValue::Utf8(e) => match e {
2955                Some(value) => {
2956                    Arc::new(StringArray::from_iter_values(repeat_n(value, size)))
2957                }
2958                None => new_null_array(&DataType::Utf8, size),
2959            },
2960            ScalarValue::Utf8View(e) => match e {
2961                Some(value) => {
2962                    Arc::new(StringViewArray::from_iter_values(repeat_n(value, size)))
2963                }
2964                None => new_null_array(&DataType::Utf8View, size),
2965            },
2966            ScalarValue::LargeUtf8(e) => match e {
2967                Some(value) => {
2968                    Arc::new(LargeStringArray::from_iter_values(repeat_n(value, size)))
2969                }
2970                None => new_null_array(&DataType::LargeUtf8, size),
2971            },
2972            ScalarValue::Binary(e) => match e {
2973                Some(value) => Arc::new(
2974                    repeat_n(Some(value.as_slice()), size).collect::<BinaryArray>(),
2975                ),
2976                None => new_null_array(&DataType::Binary, size),
2977            },
2978            ScalarValue::BinaryView(e) => match e {
2979                Some(value) => Arc::new(
2980                    repeat_n(Some(value.as_slice()), size).collect::<BinaryViewArray>(),
2981                ),
2982                None => new_null_array(&DataType::BinaryView, size),
2983            },
2984            ScalarValue::FixedSizeBinary(s, e) => match e {
2985                Some(value) => Arc::new(
2986                    FixedSizeBinaryArray::try_from_sparse_iter_with_size(
2987                        repeat_n(Some(value.as_slice()), size),
2988                        *s,
2989                    )
2990                    .unwrap(),
2991                ),
2992                None => Arc::new(FixedSizeBinaryArray::new_null(*s, size)),
2993            },
2994            ScalarValue::LargeBinary(e) => match e {
2995                Some(value) => Arc::new(
2996                    repeat_n(Some(value.as_slice()), size).collect::<LargeBinaryArray>(),
2997                ),
2998                None => new_null_array(&DataType::LargeBinary, size),
2999            },
3000            ScalarValue::List(arr) => {
3001                if size == 1 {
3002                    return Ok(Arc::clone(arr) as Arc<dyn Array>);
3003                }
3004                Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
3005            }
3006            ScalarValue::LargeList(arr) => {
3007                if size == 1 {
3008                    return Ok(Arc::clone(arr) as Arc<dyn Array>);
3009                }
3010                Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
3011            }
3012            ScalarValue::FixedSizeList(arr) => {
3013                if size == 1 {
3014                    return Ok(Arc::clone(arr) as Arc<dyn Array>);
3015                }
3016                Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
3017            }
3018            ScalarValue::Struct(arr) => {
3019                if size == 1 {
3020                    return Ok(Arc::clone(arr) as Arc<dyn Array>);
3021                }
3022                Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
3023            }
3024            ScalarValue::Map(arr) => {
3025                if size == 1 {
3026                    return Ok(Arc::clone(arr) as Arc<dyn Array>);
3027                }
3028                Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
3029            }
3030            ScalarValue::Date32(e) => {
3031                build_array_from_option!(Date32, Date32Array, e, size)
3032            }
3033            ScalarValue::Date64(e) => {
3034                build_array_from_option!(Date64, Date64Array, e, size)
3035            }
3036            ScalarValue::Time32Second(e) => {
3037                build_array_from_option!(
3038                    Time32,
3039                    TimeUnit::Second,
3040                    Time32SecondArray,
3041                    e,
3042                    size
3043                )
3044            }
3045            ScalarValue::Time32Millisecond(e) => {
3046                build_array_from_option!(
3047                    Time32,
3048                    TimeUnit::Millisecond,
3049                    Time32MillisecondArray,
3050                    e,
3051                    size
3052                )
3053            }
3054            ScalarValue::Time64Microsecond(e) => {
3055                build_array_from_option!(
3056                    Time64,
3057                    TimeUnit::Microsecond,
3058                    Time64MicrosecondArray,
3059                    e,
3060                    size
3061                )
3062            }
3063            ScalarValue::Time64Nanosecond(e) => {
3064                build_array_from_option!(
3065                    Time64,
3066                    TimeUnit::Nanosecond,
3067                    Time64NanosecondArray,
3068                    e,
3069                    size
3070                )
3071            }
3072            ScalarValue::IntervalDayTime(e) => build_array_from_option!(
3073                Interval,
3074                IntervalUnit::DayTime,
3075                IntervalDayTimeArray,
3076                e,
3077                size
3078            ),
3079            ScalarValue::IntervalYearMonth(e) => build_array_from_option!(
3080                Interval,
3081                IntervalUnit::YearMonth,
3082                IntervalYearMonthArray,
3083                e,
3084                size
3085            ),
3086            ScalarValue::IntervalMonthDayNano(e) => build_array_from_option!(
3087                Interval,
3088                IntervalUnit::MonthDayNano,
3089                IntervalMonthDayNanoArray,
3090                e,
3091                size
3092            ),
3093            ScalarValue::DurationSecond(e) => build_array_from_option!(
3094                Duration,
3095                TimeUnit::Second,
3096                DurationSecondArray,
3097                e,
3098                size
3099            ),
3100            ScalarValue::DurationMillisecond(e) => build_array_from_option!(
3101                Duration,
3102                TimeUnit::Millisecond,
3103                DurationMillisecondArray,
3104                e,
3105                size
3106            ),
3107            ScalarValue::DurationMicrosecond(e) => build_array_from_option!(
3108                Duration,
3109                TimeUnit::Microsecond,
3110                DurationMicrosecondArray,
3111                e,
3112                size
3113            ),
3114            ScalarValue::DurationNanosecond(e) => build_array_from_option!(
3115                Duration,
3116                TimeUnit::Nanosecond,
3117                DurationNanosecondArray,
3118                e,
3119                size
3120            ),
3121            ScalarValue::Union(value, fields, mode) => match value {
3122                Some((v_id, value)) => {
3123                    let mut new_fields = Vec::with_capacity(fields.len());
3124                    let mut child_arrays = Vec::<ArrayRef>::with_capacity(fields.len());
3125                    for (f_id, field) in fields.iter() {
3126                        let ar = if f_id == *v_id {
3127                            value.to_array_of_size(size)?
3128                        } else {
3129                            let dt = field.data_type();
3130                            match mode {
3131                                UnionMode::Sparse => new_null_array(dt, size),
3132                                // In a dense union, only the child with values needs to be
3133                                // allocated
3134                                UnionMode::Dense => new_null_array(dt, 0),
3135                            }
3136                        };
3137                        let field = (**field).clone();
3138                        child_arrays.push(ar);
3139                        new_fields.push(field.clone());
3140                    }
3141                    let type_ids = repeat_n(*v_id, size);
3142                    let type_ids = ScalarBuffer::<i8>::from_iter(type_ids);
3143                    let value_offsets = match mode {
3144                        UnionMode::Sparse => None,
3145                        UnionMode::Dense => Some(ScalarBuffer::from_iter(0..size as i32)),
3146                    };
3147                    let ar = UnionArray::try_new(
3148                        fields.clone(),
3149                        type_ids,
3150                        value_offsets,
3151                        child_arrays,
3152                    )
3153                    .map_err(|e| DataFusionError::ArrowError(Box::new(e), None))?;
3154                    Arc::new(ar)
3155                }
3156                None => {
3157                    let dt = self.data_type();
3158                    new_null_array(&dt, size)
3159                }
3160            },
3161            ScalarValue::Dictionary(key_type, v) => {
3162                // values array is one element long (the value)
3163                match key_type.as_ref() {
3164                    DataType::Int8 => dict_from_scalar::<Int8Type>(v, size)?,
3165                    DataType::Int16 => dict_from_scalar::<Int16Type>(v, size)?,
3166                    DataType::Int32 => dict_from_scalar::<Int32Type>(v, size)?,
3167                    DataType::Int64 => dict_from_scalar::<Int64Type>(v, size)?,
3168                    DataType::UInt8 => dict_from_scalar::<UInt8Type>(v, size)?,
3169                    DataType::UInt16 => dict_from_scalar::<UInt16Type>(v, size)?,
3170                    DataType::UInt32 => dict_from_scalar::<UInt32Type>(v, size)?,
3171                    DataType::UInt64 => dict_from_scalar::<UInt64Type>(v, size)?,
3172                    _ => unreachable!("Invalid dictionary keys type: {}", key_type),
3173                }
3174            }
3175            ScalarValue::Null => get_or_create_cached_null_array(size),
3176        })
3177    }
3178
3179    fn get_decimal_value_from_array(
3180        array: &dyn Array,
3181        index: usize,
3182        precision: u8,
3183        scale: i8,
3184    ) -> Result<ScalarValue> {
3185        match array.data_type() {
3186            DataType::Decimal32(_, _) => {
3187                let array = as_decimal32_array(array)?;
3188                if array.is_null(index) {
3189                    Ok(ScalarValue::Decimal32(None, precision, scale))
3190                } else {
3191                    let value = array.value(index);
3192                    Ok(ScalarValue::Decimal32(Some(value), precision, scale))
3193                }
3194            }
3195            DataType::Decimal64(_, _) => {
3196                let array = as_decimal64_array(array)?;
3197                if array.is_null(index) {
3198                    Ok(ScalarValue::Decimal64(None, precision, scale))
3199                } else {
3200                    let value = array.value(index);
3201                    Ok(ScalarValue::Decimal64(Some(value), precision, scale))
3202                }
3203            }
3204            DataType::Decimal128(_, _) => {
3205                let array = as_decimal128_array(array)?;
3206                if array.is_null(index) {
3207                    Ok(ScalarValue::Decimal128(None, precision, scale))
3208                } else {
3209                    let value = array.value(index);
3210                    Ok(ScalarValue::Decimal128(Some(value), precision, scale))
3211                }
3212            }
3213            DataType::Decimal256(_, _) => {
3214                let array = as_decimal256_array(array)?;
3215                if array.is_null(index) {
3216                    Ok(ScalarValue::Decimal256(None, precision, scale))
3217                } else {
3218                    let value = array.value(index);
3219                    Ok(ScalarValue::Decimal256(Some(value), precision, scale))
3220                }
3221            }
3222            other => {
3223                unreachable!("Invalid type isn't decimal: {other:?}")
3224            }
3225        }
3226    }
3227
3228    fn list_to_array_of_size(arr: &dyn Array, size: usize) -> Result<ArrayRef> {
3229        let arrays = repeat_n(arr, size).collect::<Vec<_>>();
3230        let ret = match !arrays.is_empty() {
3231            true => arrow::compute::concat(arrays.as_slice())?,
3232            false => arr.slice(0, 0),
3233        };
3234        Ok(ret)
3235    }
3236
3237    /// Retrieve ScalarValue for each row in `array`
3238    ///
3239    /// Elements in `array` may be NULL, in which case the corresponding element in the returned vector is None.
3240    ///
3241    /// Example 1: Array (ScalarValue::Int32)
3242    /// ```
3243    /// use arrow::array::ListArray;
3244    /// use arrow::datatypes::{DataType, Int32Type};
3245    /// use datafusion_common::ScalarValue;
3246    ///
3247    /// // Equivalent to [[1,2,3], [4,5]]
3248    /// let list_arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
3249    ///     Some(vec![Some(1), Some(2), Some(3)]),
3250    ///     Some(vec![Some(4), Some(5)]),
3251    /// ]);
3252    ///
3253    /// // Convert the array into Scalar Values for each row
3254    /// let scalar_vec = ScalarValue::convert_array_to_scalar_vec(&list_arr).unwrap();
3255    ///
3256    /// let expected = vec![
3257    ///     Some(vec![
3258    ///         ScalarValue::Int32(Some(1)),
3259    ///         ScalarValue::Int32(Some(2)),
3260    ///         ScalarValue::Int32(Some(3)),
3261    ///     ]),
3262    ///     Some(vec![
3263    ///         ScalarValue::Int32(Some(4)),
3264    ///         ScalarValue::Int32(Some(5)),
3265    ///     ]),
3266    /// ];
3267    ///
3268    /// assert_eq!(scalar_vec, expected);
3269    /// ```
3270    ///
3271    /// Example 2: Nested array (ScalarValue::List)
3272    /// ```
3273    /// use arrow::array::ListArray;
3274    /// use arrow::datatypes::{DataType, Int32Type};
3275    /// use datafusion_common::utils::SingleRowListArrayBuilder;
3276    /// use datafusion_common::ScalarValue;
3277    /// use std::sync::Arc;
3278    ///
3279    /// let list_arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
3280    ///     Some(vec![Some(1), Some(2), Some(3)]),
3281    ///     Some(vec![Some(4), Some(5)]),
3282    /// ]);
3283    ///
3284    /// // Wrap into another layer of list, we got nested array as [ [[1,2,3], [4,5]] ]
3285    /// let list_arr = SingleRowListArrayBuilder::new(Arc::new(list_arr)).build_list_array();
3286    ///
3287    /// // Convert the array into Scalar Values for each row, we got 1D arrays in this example
3288    /// let scalar_vec = ScalarValue::convert_array_to_scalar_vec(&list_arr).unwrap();
3289    ///
3290    /// let l1 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
3291    ///     Some(1),
3292    ///     Some(2),
3293    ///     Some(3),
3294    /// ])]);
3295    /// let l2 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
3296    ///     Some(4),
3297    ///     Some(5),
3298    /// ])]);
3299    ///
3300    /// let expected = vec![Some(vec![
3301    ///     ScalarValue::List(Arc::new(l1)),
3302    ///     ScalarValue::List(Arc::new(l2)),
3303    /// ])];
3304    ///
3305    /// assert_eq!(scalar_vec, expected);
3306    /// ```
3307    ///
3308    /// Example 3: Nullable array
3309    /// ```
3310    /// use arrow::array::ListArray;
3311    /// use arrow::datatypes::{DataType, Int32Type};
3312    /// use datafusion_common::ScalarValue;
3313    ///
3314    /// let list_arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
3315    ///     Some(vec![Some(1), Some(2), Some(3)]),
3316    ///     None,
3317    ///     Some(vec![Some(4), Some(5)]),
3318    /// ]);
3319    ///
3320    /// // Convert the array into Scalar Values for each row
3321    /// let scalar_vec = ScalarValue::convert_array_to_scalar_vec(&list_arr).unwrap();
3322    ///
3323    /// let expected = vec![
3324    ///     Some(vec![
3325    ///         ScalarValue::Int32(Some(1)),
3326    ///         ScalarValue::Int32(Some(2)),
3327    ///         ScalarValue::Int32(Some(3)),
3328    ///     ]),
3329    ///     None,
3330    ///     Some(vec![
3331    ///         ScalarValue::Int32(Some(4)),
3332    ///         ScalarValue::Int32(Some(5)),
3333    ///     ]),
3334    /// ];
3335    ///
3336    /// assert_eq!(scalar_vec, expected);
3337    /// ```
3338    pub fn convert_array_to_scalar_vec(
3339        array: &dyn Array,
3340    ) -> Result<Vec<Option<Vec<Self>>>> {
3341        fn generic_collect<OffsetSize: OffsetSizeTrait>(
3342            array: &dyn Array,
3343        ) -> Result<Vec<Option<Vec<ScalarValue>>>> {
3344            array
3345                .as_list::<OffsetSize>()
3346                .iter()
3347                .map(|nested_array| {
3348                    nested_array
3349                        .map(|array| {
3350                            (0..array.len())
3351                                .map(|i| ScalarValue::try_from_array(&array, i))
3352                                .collect::<Result<Vec<_>>>()
3353                        })
3354                        .transpose()
3355                })
3356                .collect()
3357        }
3358
3359        match array.data_type() {
3360            DataType::List(_) => generic_collect::<i32>(array),
3361            DataType::LargeList(_) => generic_collect::<i64>(array),
3362            _ => _internal_err!(
3363                "ScalarValue::convert_array_to_scalar_vec input must be a List/LargeList type"
3364            ),
3365        }
3366    }
3367
3368    #[deprecated(
3369        since = "46.0.0",
3370        note = "This function is obsolete. Use `to_array` instead"
3371    )]
3372    pub fn raw_data(&self) -> Result<ArrayRef> {
3373        match self {
3374            ScalarValue::List(arr) => Ok(arr.to_owned()),
3375            _ => _internal_err!("ScalarValue is not a list"),
3376        }
3377    }
3378
3379    /// Converts a value in `array` at `index` into a ScalarValue
3380    pub fn try_from_array(array: &dyn Array, index: usize) -> Result<Self> {
3381        // handle NULL value
3382        if !array.is_valid(index) {
3383            return array.data_type().try_into();
3384        }
3385
3386        Ok(match array.data_type() {
3387            DataType::Null => ScalarValue::Null,
3388            DataType::Decimal32(precision, scale) => {
3389                ScalarValue::get_decimal_value_from_array(
3390                    array, index, *precision, *scale,
3391                )?
3392            }
3393            DataType::Decimal64(precision, scale) => {
3394                ScalarValue::get_decimal_value_from_array(
3395                    array, index, *precision, *scale,
3396                )?
3397            }
3398            DataType::Decimal128(precision, scale) => {
3399                ScalarValue::get_decimal_value_from_array(
3400                    array, index, *precision, *scale,
3401                )?
3402            }
3403            DataType::Decimal256(precision, scale) => {
3404                ScalarValue::get_decimal_value_from_array(
3405                    array, index, *precision, *scale,
3406                )?
3407            }
3408            DataType::Boolean => typed_cast!(array, index, as_boolean_array, Boolean)?,
3409            DataType::Float64 => typed_cast!(array, index, as_float64_array, Float64)?,
3410            DataType::Float32 => typed_cast!(array, index, as_float32_array, Float32)?,
3411            DataType::Float16 => typed_cast!(array, index, as_float16_array, Float16)?,
3412            DataType::UInt64 => typed_cast!(array, index, as_uint64_array, UInt64)?,
3413            DataType::UInt32 => typed_cast!(array, index, as_uint32_array, UInt32)?,
3414            DataType::UInt16 => typed_cast!(array, index, as_uint16_array, UInt16)?,
3415            DataType::UInt8 => typed_cast!(array, index, as_uint8_array, UInt8)?,
3416            DataType::Int64 => typed_cast!(array, index, as_int64_array, Int64)?,
3417            DataType::Int32 => typed_cast!(array, index, as_int32_array, Int32)?,
3418            DataType::Int16 => typed_cast!(array, index, as_int16_array, Int16)?,
3419            DataType::Int8 => typed_cast!(array, index, as_int8_array, Int8)?,
3420            DataType::Binary => typed_cast!(array, index, as_binary_array, Binary)?,
3421            DataType::LargeBinary => {
3422                typed_cast!(array, index, as_large_binary_array, LargeBinary)?
3423            }
3424            DataType::BinaryView => {
3425                typed_cast!(array, index, as_binary_view_array, BinaryView)?
3426            }
3427            DataType::Utf8 => typed_cast!(array, index, as_string_array, Utf8)?,
3428            DataType::LargeUtf8 => {
3429                typed_cast!(array, index, as_large_string_array, LargeUtf8)?
3430            }
3431            DataType::Utf8View => {
3432                typed_cast!(array, index, as_string_view_array, Utf8View)?
3433            }
3434            DataType::List(field) => {
3435                let list_array = array.as_list::<i32>();
3436                let nested_array = list_array.value(index);
3437                // Produces a single element `ListArray` with the value at `index`.
3438                SingleRowListArrayBuilder::new(nested_array)
3439                    .with_field(field)
3440                    .build_list_scalar()
3441            }
3442            DataType::LargeList(field) => {
3443                let list_array = as_large_list_array(array)?;
3444                let nested_array = list_array.value(index);
3445                // Produces a single element `LargeListArray` with the value at `index`.
3446                SingleRowListArrayBuilder::new(nested_array)
3447                    .with_field(field)
3448                    .build_large_list_scalar()
3449            }
3450            // TODO: There is no test for FixedSizeList now, add it later
3451            DataType::FixedSizeList(field, _) => {
3452                let list_array = as_fixed_size_list_array(array)?;
3453                let nested_array = list_array.value(index);
3454                // Produces a single element `FixedSizeListArray` with the value at `index`.
3455                let list_size = nested_array.len();
3456                SingleRowListArrayBuilder::new(nested_array)
3457                    .with_field(field)
3458                    .build_fixed_size_list_scalar(list_size)
3459            }
3460            DataType::Date32 => typed_cast!(array, index, as_date32_array, Date32)?,
3461            DataType::Date64 => typed_cast!(array, index, as_date64_array, Date64)?,
3462            DataType::Time32(TimeUnit::Second) => {
3463                typed_cast!(array, index, as_time32_second_array, Time32Second)?
3464            }
3465            DataType::Time32(TimeUnit::Millisecond) => {
3466                typed_cast!(array, index, as_time32_millisecond_array, Time32Millisecond)?
3467            }
3468            DataType::Time64(TimeUnit::Microsecond) => {
3469                typed_cast!(array, index, as_time64_microsecond_array, Time64Microsecond)?
3470            }
3471            DataType::Time64(TimeUnit::Nanosecond) => {
3472                typed_cast!(array, index, as_time64_nanosecond_array, Time64Nanosecond)?
3473            }
3474            DataType::Timestamp(TimeUnit::Second, tz_opt) => typed_cast_tz!(
3475                array,
3476                index,
3477                as_timestamp_second_array,
3478                TimestampSecond,
3479                tz_opt
3480            )?,
3481            DataType::Timestamp(TimeUnit::Millisecond, tz_opt) => typed_cast_tz!(
3482                array,
3483                index,
3484                as_timestamp_millisecond_array,
3485                TimestampMillisecond,
3486                tz_opt
3487            )?,
3488            DataType::Timestamp(TimeUnit::Microsecond, tz_opt) => typed_cast_tz!(
3489                array,
3490                index,
3491                as_timestamp_microsecond_array,
3492                TimestampMicrosecond,
3493                tz_opt
3494            )?,
3495            DataType::Timestamp(TimeUnit::Nanosecond, tz_opt) => typed_cast_tz!(
3496                array,
3497                index,
3498                as_timestamp_nanosecond_array,
3499                TimestampNanosecond,
3500                tz_opt
3501            )?,
3502            DataType::Dictionary(key_type, _) => {
3503                let (values_array, values_index) = match key_type.as_ref() {
3504                    DataType::Int8 => get_dict_value::<Int8Type>(array, index)?,
3505                    DataType::Int16 => get_dict_value::<Int16Type>(array, index)?,
3506                    DataType::Int32 => get_dict_value::<Int32Type>(array, index)?,
3507                    DataType::Int64 => get_dict_value::<Int64Type>(array, index)?,
3508                    DataType::UInt8 => get_dict_value::<UInt8Type>(array, index)?,
3509                    DataType::UInt16 => get_dict_value::<UInt16Type>(array, index)?,
3510                    DataType::UInt32 => get_dict_value::<UInt32Type>(array, index)?,
3511                    DataType::UInt64 => get_dict_value::<UInt64Type>(array, index)?,
3512                    _ => unreachable!("Invalid dictionary keys type: {}", key_type),
3513                };
3514                // look up the index in the values dictionary
3515                let value = match values_index {
3516                    Some(values_index) => {
3517                        ScalarValue::try_from_array(values_array, values_index)
3518                    }
3519                    // else entry was null, so return null
3520                    None => values_array.data_type().try_into(),
3521                }?;
3522
3523                Self::Dictionary(key_type.clone(), Box::new(value))
3524            }
3525            DataType::Struct(_) => {
3526                let a = array.slice(index, 1);
3527                Self::Struct(Arc::new(a.as_struct().to_owned()))
3528            }
3529            DataType::FixedSizeBinary(_) => {
3530                let array = as_fixed_size_binary_array(array)?;
3531                let size = match array.data_type() {
3532                    DataType::FixedSizeBinary(size) => *size,
3533                    _ => unreachable!(),
3534                };
3535                ScalarValue::FixedSizeBinary(
3536                    size,
3537                    match array.is_null(index) {
3538                        true => None,
3539                        false => Some(array.value(index).into()),
3540                    },
3541                )
3542            }
3543            DataType::Interval(IntervalUnit::DayTime) => {
3544                typed_cast!(array, index, as_interval_dt_array, IntervalDayTime)?
3545            }
3546            DataType::Interval(IntervalUnit::YearMonth) => {
3547                typed_cast!(array, index, as_interval_ym_array, IntervalYearMonth)?
3548            }
3549            DataType::Interval(IntervalUnit::MonthDayNano) => {
3550                typed_cast!(array, index, as_interval_mdn_array, IntervalMonthDayNano)?
3551            }
3552
3553            DataType::Duration(TimeUnit::Second) => {
3554                typed_cast!(array, index, as_duration_second_array, DurationSecond)?
3555            }
3556            DataType::Duration(TimeUnit::Millisecond) => typed_cast!(
3557                array,
3558                index,
3559                as_duration_millisecond_array,
3560                DurationMillisecond
3561            )?,
3562            DataType::Duration(TimeUnit::Microsecond) => typed_cast!(
3563                array,
3564                index,
3565                as_duration_microsecond_array,
3566                DurationMicrosecond
3567            )?,
3568            DataType::Duration(TimeUnit::Nanosecond) => typed_cast!(
3569                array,
3570                index,
3571                as_duration_nanosecond_array,
3572                DurationNanosecond
3573            )?,
3574            DataType::Map(_, _) => {
3575                let a = array.slice(index, 1);
3576                Self::Map(Arc::new(a.as_map().to_owned()))
3577            }
3578            DataType::Union(fields, mode) => {
3579                let array = as_union_array(array)?;
3580                let ti = array.type_id(index);
3581                let index = array.value_offset(index);
3582                let value = ScalarValue::try_from_array(array.child(ti), index)?;
3583                ScalarValue::Union(Some((ti, Box::new(value))), fields.clone(), *mode)
3584            }
3585            other => {
3586                return _not_impl_err!(
3587                    "Can't create a scalar from array of type \"{other:?}\""
3588                );
3589            }
3590        })
3591    }
3592
3593    /// Try to parse `value` into a ScalarValue of type `target_type`
3594    pub fn try_from_string(value: String, target_type: &DataType) -> Result<Self> {
3595        ScalarValue::from(value).cast_to(target_type)
3596    }
3597
3598    /// Returns the Some(`&str`) representation of `ScalarValue` of logical string type
3599    ///
3600    /// Returns `None` if this `ScalarValue` is not a logical string type or the
3601    /// `ScalarValue` represents the `NULL` value.
3602    ///
3603    /// Note you can use [`Option::flatten`] to check for non null logical
3604    /// strings.
3605    ///
3606    /// For example, [`ScalarValue::Utf8`], [`ScalarValue::LargeUtf8`], and
3607    /// [`ScalarValue::Dictionary`] with a logical string value and store
3608    /// strings and can be accessed as `&str` using this method.
3609    ///
3610    /// # Example: logical strings
3611    /// ```
3612    /// # use datafusion_common::ScalarValue;
3613    /// /// non strings return None
3614    /// let scalar = ScalarValue::from(42);
3615    /// assert_eq!(scalar.try_as_str(), None);
3616    /// // Non null logical string returns Some(Some(&str))
3617    /// let scalar = ScalarValue::from("hello");
3618    /// assert_eq!(scalar.try_as_str(), Some(Some("hello")));
3619    /// // Null logical string returns Some(None)
3620    /// let scalar = ScalarValue::Utf8(None);
3621    /// assert_eq!(scalar.try_as_str(), Some(None));
3622    /// ```
3623    ///
3624    /// # Example: use [`Option::flatten`] to check for non-null logical strings
3625    /// ```
3626    /// # use datafusion_common::ScalarValue;
3627    /// // Non null logical string returns Some(Some(&str))
3628    /// let scalar = ScalarValue::from("hello");
3629    /// assert_eq!(scalar.try_as_str().flatten(), Some("hello"));
3630    /// ```
3631    pub fn try_as_str(&self) -> Option<Option<&str>> {
3632        let v = match self {
3633            ScalarValue::Utf8(v) => v,
3634            ScalarValue::LargeUtf8(v) => v,
3635            ScalarValue::Utf8View(v) => v,
3636            ScalarValue::Dictionary(_, v) => return v.try_as_str(),
3637            _ => return None,
3638        };
3639        Some(v.as_ref().map(|v| v.as_str()))
3640    }
3641
3642    /// Try to cast this value to a ScalarValue of type `data_type`
3643    pub fn cast_to(&self, target_type: &DataType) -> Result<Self> {
3644        self.cast_to_with_options(target_type, &DEFAULT_CAST_OPTIONS)
3645    }
3646
3647    /// Try to cast this value to a ScalarValue of type `data_type` with [`CastOptions`]
3648    pub fn cast_to_with_options(
3649        &self,
3650        target_type: &DataType,
3651        cast_options: &CastOptions<'static>,
3652    ) -> Result<Self> {
3653        let scalar_array = self.to_array()?;
3654        let cast_arr = cast_with_options(&scalar_array, target_type, cast_options)?;
3655        ScalarValue::try_from_array(&cast_arr, 0)
3656    }
3657
3658    fn eq_array_decimal32(
3659        array: &ArrayRef,
3660        index: usize,
3661        value: Option<&i32>,
3662        precision: u8,
3663        scale: i8,
3664    ) -> Result<bool> {
3665        let array = as_decimal32_array(array)?;
3666        if array.precision() != precision || array.scale() != scale {
3667            return Ok(false);
3668        }
3669        let is_null = array.is_null(index);
3670        if let Some(v) = value {
3671            Ok(!array.is_null(index) && array.value(index) == *v)
3672        } else {
3673            Ok(is_null)
3674        }
3675    }
3676
3677    fn eq_array_decimal64(
3678        array: &ArrayRef,
3679        index: usize,
3680        value: Option<&i64>,
3681        precision: u8,
3682        scale: i8,
3683    ) -> Result<bool> {
3684        let array = as_decimal64_array(array)?;
3685        if array.precision() != precision || array.scale() != scale {
3686            return Ok(false);
3687        }
3688        let is_null = array.is_null(index);
3689        if let Some(v) = value {
3690            Ok(!array.is_null(index) && array.value(index) == *v)
3691        } else {
3692            Ok(is_null)
3693        }
3694    }
3695
3696    fn eq_array_decimal(
3697        array: &ArrayRef,
3698        index: usize,
3699        value: Option<&i128>,
3700        precision: u8,
3701        scale: i8,
3702    ) -> Result<bool> {
3703        let array = as_decimal128_array(array)?;
3704        if array.precision() != precision || array.scale() != scale {
3705            return Ok(false);
3706        }
3707        let is_null = array.is_null(index);
3708        if let Some(v) = value {
3709            Ok(!array.is_null(index) && array.value(index) == *v)
3710        } else {
3711            Ok(is_null)
3712        }
3713    }
3714
3715    fn eq_array_decimal256(
3716        array: &ArrayRef,
3717        index: usize,
3718        value: Option<&i256>,
3719        precision: u8,
3720        scale: i8,
3721    ) -> Result<bool> {
3722        let array = as_decimal256_array(array)?;
3723        if array.precision() != precision || array.scale() != scale {
3724            return Ok(false);
3725        }
3726        let is_null = array.is_null(index);
3727        if let Some(v) = value {
3728            Ok(!array.is_null(index) && array.value(index) == *v)
3729        } else {
3730            Ok(is_null)
3731        }
3732    }
3733
3734    /// Compares a single row of array @ index for equality with self,
3735    /// in an optimized fashion.
3736    ///
3737    /// This method implements an optimized version of:
3738    ///
3739    /// ```text
3740    ///     let arr_scalar = Self::try_from_array(array, index).unwrap();
3741    ///     arr_scalar.eq(self)
3742    /// ```
3743    ///
3744    /// *Performance note*: the arrow compute kernels should be
3745    /// preferred over this function if at all possible as they can be
3746    /// vectorized and are generally much faster.
3747    ///
3748    /// This function has a few narrow use cases such as hash table key
3749    /// comparisons where comparing a single row at a time is necessary.
3750    ///
3751    /// # Errors
3752    ///
3753    /// Errors if
3754    /// - it fails to downcast `array` to the data type of `self`
3755    /// - `self` is a `Struct`
3756    ///
3757    /// # Panics
3758    ///
3759    /// Panics if `self` is a dictionary with invalid key type
3760    #[inline]
3761    pub fn eq_array(&self, array: &ArrayRef, index: usize) -> Result<bool> {
3762        Ok(match self {
3763            ScalarValue::Decimal32(v, precision, scale) => {
3764                ScalarValue::eq_array_decimal32(
3765                    array,
3766                    index,
3767                    v.as_ref(),
3768                    *precision,
3769                    *scale,
3770                )?
3771            }
3772            ScalarValue::Decimal64(v, precision, scale) => {
3773                ScalarValue::eq_array_decimal64(
3774                    array,
3775                    index,
3776                    v.as_ref(),
3777                    *precision,
3778                    *scale,
3779                )?
3780            }
3781            ScalarValue::Decimal128(v, precision, scale) => {
3782                ScalarValue::eq_array_decimal(
3783                    array,
3784                    index,
3785                    v.as_ref(),
3786                    *precision,
3787                    *scale,
3788                )?
3789            }
3790            ScalarValue::Decimal256(v, precision, scale) => {
3791                ScalarValue::eq_array_decimal256(
3792                    array,
3793                    index,
3794                    v.as_ref(),
3795                    *precision,
3796                    *scale,
3797                )?
3798            }
3799            ScalarValue::Boolean(val) => {
3800                eq_array_primitive!(array, index, as_boolean_array, val)?
3801            }
3802            ScalarValue::Float16(val) => {
3803                eq_array_primitive!(array, index, as_float16_array, val)?
3804            }
3805            ScalarValue::Float32(val) => {
3806                eq_array_primitive!(array, index, as_float32_array, val)?
3807            }
3808            ScalarValue::Float64(val) => {
3809                eq_array_primitive!(array, index, as_float64_array, val)?
3810            }
3811            ScalarValue::Int8(val) => {
3812                eq_array_primitive!(array, index, as_int8_array, val)?
3813            }
3814            ScalarValue::Int16(val) => {
3815                eq_array_primitive!(array, index, as_int16_array, val)?
3816            }
3817            ScalarValue::Int32(val) => {
3818                eq_array_primitive!(array, index, as_int32_array, val)?
3819            }
3820            ScalarValue::Int64(val) => {
3821                eq_array_primitive!(array, index, as_int64_array, val)?
3822            }
3823            ScalarValue::UInt8(val) => {
3824                eq_array_primitive!(array, index, as_uint8_array, val)?
3825            }
3826            ScalarValue::UInt16(val) => {
3827                eq_array_primitive!(array, index, as_uint16_array, val)?
3828            }
3829            ScalarValue::UInt32(val) => {
3830                eq_array_primitive!(array, index, as_uint32_array, val)?
3831            }
3832            ScalarValue::UInt64(val) => {
3833                eq_array_primitive!(array, index, as_uint64_array, val)?
3834            }
3835            ScalarValue::Utf8(val) => {
3836                eq_array_primitive!(array, index, as_string_array, val)?
3837            }
3838            ScalarValue::Utf8View(val) => {
3839                eq_array_primitive!(array, index, as_string_view_array, val)?
3840            }
3841            ScalarValue::LargeUtf8(val) => {
3842                eq_array_primitive!(array, index, as_large_string_array, val)?
3843            }
3844            ScalarValue::Binary(val) => {
3845                eq_array_primitive!(array, index, as_binary_array, val)?
3846            }
3847            ScalarValue::BinaryView(val) => {
3848                eq_array_primitive!(array, index, as_binary_view_array, val)?
3849            }
3850            ScalarValue::FixedSizeBinary(_, val) => {
3851                eq_array_primitive!(array, index, as_fixed_size_binary_array, val)?
3852            }
3853            ScalarValue::LargeBinary(val) => {
3854                eq_array_primitive!(array, index, as_large_binary_array, val)?
3855            }
3856            ScalarValue::List(arr) => {
3857                Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
3858            }
3859            ScalarValue::LargeList(arr) => {
3860                Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
3861            }
3862            ScalarValue::FixedSizeList(arr) => {
3863                Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
3864            }
3865            ScalarValue::Struct(arr) => {
3866                Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
3867            }
3868            ScalarValue::Map(arr) => {
3869                Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
3870            }
3871            ScalarValue::Date32(val) => {
3872                eq_array_primitive!(array, index, as_date32_array, val)?
3873            }
3874            ScalarValue::Date64(val) => {
3875                eq_array_primitive!(array, index, as_date64_array, val)?
3876            }
3877            ScalarValue::Time32Second(val) => {
3878                eq_array_primitive!(array, index, as_time32_second_array, val)?
3879            }
3880            ScalarValue::Time32Millisecond(val) => {
3881                eq_array_primitive!(array, index, as_time32_millisecond_array, val)?
3882            }
3883            ScalarValue::Time64Microsecond(val) => {
3884                eq_array_primitive!(array, index, as_time64_microsecond_array, val)?
3885            }
3886            ScalarValue::Time64Nanosecond(val) => {
3887                eq_array_primitive!(array, index, as_time64_nanosecond_array, val)?
3888            }
3889            ScalarValue::TimestampSecond(val, _) => {
3890                eq_array_primitive!(array, index, as_timestamp_second_array, val)?
3891            }
3892            ScalarValue::TimestampMillisecond(val, _) => {
3893                eq_array_primitive!(array, index, as_timestamp_millisecond_array, val)?
3894            }
3895            ScalarValue::TimestampMicrosecond(val, _) => {
3896                eq_array_primitive!(array, index, as_timestamp_microsecond_array, val)?
3897            }
3898            ScalarValue::TimestampNanosecond(val, _) => {
3899                eq_array_primitive!(array, index, as_timestamp_nanosecond_array, val)?
3900            }
3901            ScalarValue::IntervalYearMonth(val) => {
3902                eq_array_primitive!(array, index, as_interval_ym_array, val)?
3903            }
3904            ScalarValue::IntervalDayTime(val) => {
3905                eq_array_primitive!(array, index, as_interval_dt_array, val)?
3906            }
3907            ScalarValue::IntervalMonthDayNano(val) => {
3908                eq_array_primitive!(array, index, as_interval_mdn_array, val)?
3909            }
3910            ScalarValue::DurationSecond(val) => {
3911                eq_array_primitive!(array, index, as_duration_second_array, val)?
3912            }
3913            ScalarValue::DurationMillisecond(val) => {
3914                eq_array_primitive!(array, index, as_duration_millisecond_array, val)?
3915            }
3916            ScalarValue::DurationMicrosecond(val) => {
3917                eq_array_primitive!(array, index, as_duration_microsecond_array, val)?
3918            }
3919            ScalarValue::DurationNanosecond(val) => {
3920                eq_array_primitive!(array, index, as_duration_nanosecond_array, val)?
3921            }
3922            ScalarValue::Union(value, _, _) => {
3923                let array = as_union_array(array)?;
3924                let ti = array.type_id(index);
3925                let index = array.value_offset(index);
3926                if let Some((ti_v, value)) = value {
3927                    ti_v == &ti && value.eq_array(array.child(ti), index)?
3928                } else {
3929                    array.child(ti).is_null(index)
3930                }
3931            }
3932            ScalarValue::Dictionary(key_type, v) => {
3933                let (values_array, values_index) = match key_type.as_ref() {
3934                    DataType::Int8 => get_dict_value::<Int8Type>(array, index)?,
3935                    DataType::Int16 => get_dict_value::<Int16Type>(array, index)?,
3936                    DataType::Int32 => get_dict_value::<Int32Type>(array, index)?,
3937                    DataType::Int64 => get_dict_value::<Int64Type>(array, index)?,
3938                    DataType::UInt8 => get_dict_value::<UInt8Type>(array, index)?,
3939                    DataType::UInt16 => get_dict_value::<UInt16Type>(array, index)?,
3940                    DataType::UInt32 => get_dict_value::<UInt32Type>(array, index)?,
3941                    DataType::UInt64 => get_dict_value::<UInt64Type>(array, index)?,
3942                    _ => unreachable!("Invalid dictionary keys type: {}", key_type),
3943                };
3944                // was the value in the array non null?
3945                match values_index {
3946                    Some(values_index) => v.eq_array(values_array, values_index)?,
3947                    None => v.is_null(),
3948                }
3949            }
3950            ScalarValue::Null => array.is_null(index),
3951        })
3952    }
3953
3954    fn eq_array_list(arr1: &ArrayRef, arr2: &ArrayRef, index: usize) -> bool {
3955        let right = arr2.slice(index, 1);
3956        arr1 == &right
3957    }
3958
3959    /// Compare `self` with `other` and return an `Ordering`.
3960    ///
3961    /// This is the same as [`PartialOrd`] except that it returns
3962    /// `Err` if the values cannot be compared, e.g., they have incompatible data types.
3963    pub fn try_cmp(&self, other: &Self) -> Result<Ordering> {
3964        self.partial_cmp(other).ok_or_else(|| {
3965            _internal_datafusion_err!("Uncomparable values: {self:?}, {other:?}")
3966        })
3967    }
3968
3969    /// Estimate size if bytes including `Self`. For values with internal containers such as `String`
3970    /// includes the allocated size (`capacity`) rather than the current length (`len`)
3971    pub fn size(&self) -> usize {
3972        size_of_val(self)
3973            + match self {
3974                ScalarValue::Null
3975                | ScalarValue::Boolean(_)
3976                | ScalarValue::Float16(_)
3977                | ScalarValue::Float32(_)
3978                | ScalarValue::Float64(_)
3979                | ScalarValue::Decimal32(_, _, _)
3980                | ScalarValue::Decimal64(_, _, _)
3981                | ScalarValue::Decimal128(_, _, _)
3982                | ScalarValue::Decimal256(_, _, _)
3983                | ScalarValue::Int8(_)
3984                | ScalarValue::Int16(_)
3985                | ScalarValue::Int32(_)
3986                | ScalarValue::Int64(_)
3987                | ScalarValue::UInt8(_)
3988                | ScalarValue::UInt16(_)
3989                | ScalarValue::UInt32(_)
3990                | ScalarValue::UInt64(_)
3991                | ScalarValue::Date32(_)
3992                | ScalarValue::Date64(_)
3993                | ScalarValue::Time32Second(_)
3994                | ScalarValue::Time32Millisecond(_)
3995                | ScalarValue::Time64Microsecond(_)
3996                | ScalarValue::Time64Nanosecond(_)
3997                | ScalarValue::IntervalYearMonth(_)
3998                | ScalarValue::IntervalDayTime(_)
3999                | ScalarValue::IntervalMonthDayNano(_)
4000                | ScalarValue::DurationSecond(_)
4001                | ScalarValue::DurationMillisecond(_)
4002                | ScalarValue::DurationMicrosecond(_)
4003                | ScalarValue::DurationNanosecond(_) => 0,
4004                ScalarValue::Utf8(s)
4005                | ScalarValue::LargeUtf8(s)
4006                | ScalarValue::Utf8View(s) => {
4007                    s.as_ref().map(|s| s.capacity()).unwrap_or_default()
4008                }
4009                ScalarValue::TimestampSecond(_, s)
4010                | ScalarValue::TimestampMillisecond(_, s)
4011                | ScalarValue::TimestampMicrosecond(_, s)
4012                | ScalarValue::TimestampNanosecond(_, s) => {
4013                    s.as_ref().map(|s| s.len()).unwrap_or_default()
4014                }
4015                ScalarValue::Binary(b)
4016                | ScalarValue::FixedSizeBinary(_, b)
4017                | ScalarValue::LargeBinary(b)
4018                | ScalarValue::BinaryView(b) => {
4019                    b.as_ref().map(|b| b.capacity()).unwrap_or_default()
4020                }
4021                ScalarValue::List(arr) => arr.get_array_memory_size(),
4022                ScalarValue::LargeList(arr) => arr.get_array_memory_size(),
4023                ScalarValue::FixedSizeList(arr) => arr.get_array_memory_size(),
4024                ScalarValue::Struct(arr) => arr.get_array_memory_size(),
4025                ScalarValue::Map(arr) => arr.get_array_memory_size(),
4026                ScalarValue::Union(vals, fields, _mode) => {
4027                    vals.as_ref()
4028                        .map(|(_id, sv)| sv.size() - size_of_val(sv))
4029                        .unwrap_or_default()
4030                        // `fields` is boxed, so it is NOT already included in `self`
4031                        + size_of_val(fields)
4032                        + (size_of::<Field>() * fields.len())
4033                        + fields.iter().map(|(_idx, field)| field.size() - size_of_val(field)).sum::<usize>()
4034                }
4035                ScalarValue::Dictionary(dt, sv) => {
4036                    // `dt` and `sv` are boxed, so they are NOT already included in `self`
4037                    dt.size() + sv.size()
4038                }
4039            }
4040    }
4041
4042    /// Estimates [size](Self::size) of [`Vec`] in bytes.
4043    ///
4044    /// Includes the size of the [`Vec`] container itself.
4045    pub fn size_of_vec(vec: &Vec<Self>) -> usize {
4046        size_of_val(vec)
4047            + (size_of::<ScalarValue>() * vec.capacity())
4048            + vec
4049                .iter()
4050                .map(|sv| sv.size() - size_of_val(sv))
4051                .sum::<usize>()
4052    }
4053
4054    /// Estimates [size](Self::size) of [`VecDeque`] in bytes.
4055    ///
4056    /// Includes the size of the [`VecDeque`] container itself.
4057    pub fn size_of_vec_deque(vec_deque: &VecDeque<Self>) -> usize {
4058        size_of_val(vec_deque)
4059            + (size_of::<ScalarValue>() * vec_deque.capacity())
4060            + vec_deque
4061                .iter()
4062                .map(|sv| sv.size() - size_of_val(sv))
4063                .sum::<usize>()
4064    }
4065
4066    /// Estimates [size](Self::size) of [`HashSet`] in bytes.
4067    ///
4068    /// Includes the size of the [`HashSet`] container itself.
4069    pub fn size_of_hashset<S>(set: &HashSet<Self, S>) -> usize {
4070        size_of_val(set)
4071            + (size_of::<ScalarValue>() * set.capacity())
4072            + set
4073                .iter()
4074                .map(|sv| sv.size() - size_of_val(sv))
4075                .sum::<usize>()
4076    }
4077
4078    /// Compacts the allocation referenced by `self` to the minimum, copying the data if
4079    /// necessary.
4080    ///
4081    /// This can be relevant when `self` is a list or contains a list as a nested value, as
4082    /// a single list holds an Arc to its entire original array buffer.
4083    pub fn compact(&mut self) {
4084        match self {
4085            ScalarValue::Null
4086            | ScalarValue::Boolean(_)
4087            | ScalarValue::Float16(_)
4088            | ScalarValue::Float32(_)
4089            | ScalarValue::Float64(_)
4090            | ScalarValue::Decimal32(_, _, _)
4091            | ScalarValue::Decimal64(_, _, _)
4092            | ScalarValue::Decimal128(_, _, _)
4093            | ScalarValue::Decimal256(_, _, _)
4094            | ScalarValue::Int8(_)
4095            | ScalarValue::Int16(_)
4096            | ScalarValue::Int32(_)
4097            | ScalarValue::Int64(_)
4098            | ScalarValue::UInt8(_)
4099            | ScalarValue::UInt16(_)
4100            | ScalarValue::UInt32(_)
4101            | ScalarValue::UInt64(_)
4102            | ScalarValue::Date32(_)
4103            | ScalarValue::Date64(_)
4104            | ScalarValue::Time32Second(_)
4105            | ScalarValue::Time32Millisecond(_)
4106            | ScalarValue::Time64Microsecond(_)
4107            | ScalarValue::Time64Nanosecond(_)
4108            | ScalarValue::IntervalYearMonth(_)
4109            | ScalarValue::IntervalDayTime(_)
4110            | ScalarValue::IntervalMonthDayNano(_)
4111            | ScalarValue::DurationSecond(_)
4112            | ScalarValue::DurationMillisecond(_)
4113            | ScalarValue::DurationMicrosecond(_)
4114            | ScalarValue::DurationNanosecond(_)
4115            | ScalarValue::Utf8(_)
4116            | ScalarValue::LargeUtf8(_)
4117            | ScalarValue::Utf8View(_)
4118            | ScalarValue::TimestampSecond(_, _)
4119            | ScalarValue::TimestampMillisecond(_, _)
4120            | ScalarValue::TimestampMicrosecond(_, _)
4121            | ScalarValue::TimestampNanosecond(_, _)
4122            | ScalarValue::Binary(_)
4123            | ScalarValue::FixedSizeBinary(_, _)
4124            | ScalarValue::LargeBinary(_)
4125            | ScalarValue::BinaryView(_) => (),
4126            ScalarValue::FixedSizeList(arr) => {
4127                let array = copy_array_data(&arr.to_data());
4128                *Arc::make_mut(arr) = FixedSizeListArray::from(array);
4129            }
4130            ScalarValue::List(arr) => {
4131                let array = copy_array_data(&arr.to_data());
4132                *Arc::make_mut(arr) = ListArray::from(array);
4133            }
4134            ScalarValue::LargeList(arr) => {
4135                let array = copy_array_data(&arr.to_data());
4136                *Arc::make_mut(arr) = LargeListArray::from(array)
4137            }
4138            ScalarValue::Struct(arr) => {
4139                let array = copy_array_data(&arr.to_data());
4140                *Arc::make_mut(arr) = StructArray::from(array);
4141            }
4142            ScalarValue::Map(arr) => {
4143                let array = copy_array_data(&arr.to_data());
4144                *Arc::make_mut(arr) = MapArray::from(array);
4145            }
4146            ScalarValue::Union(val, _, _) => {
4147                if let Some((_, value)) = val.as_mut() {
4148                    value.compact();
4149                }
4150            }
4151            ScalarValue::Dictionary(_, value) => {
4152                value.compact();
4153            }
4154        }
4155    }
4156
4157    /// Compacts ([ScalarValue::compact]) the current [ScalarValue] and returns it.
4158    pub fn compacted(mut self) -> Self {
4159        self.compact();
4160        self
4161    }
4162
4163    /// Returns the minimum value for the given numeric `DataType`.
4164    ///
4165    /// This function returns the smallest representable value for numeric
4166    /// and temporal data types. For non-numeric types, it returns `None`.
4167    ///
4168    /// # Supported Types
4169    ///
4170    /// - **Integer types**: `i8::MIN`, `i16::MIN`, etc.
4171    /// - **Unsigned types**: Always 0 (`u8::MIN`, `u16::MIN`, etc.)
4172    /// - **Float types**: Negative infinity (IEEE 754)
4173    /// - **Decimal types**: Smallest value based on precision
4174    /// - **Temporal types**: Minimum timestamp/date values
4175    /// - **Time types**: 0 (midnight)
4176    /// - **Duration types**: `i64::MIN`
4177    pub fn min(datatype: &DataType) -> Option<ScalarValue> {
4178        match datatype {
4179            DataType::Int8 => Some(ScalarValue::Int8(Some(i8::MIN))),
4180            DataType::Int16 => Some(ScalarValue::Int16(Some(i16::MIN))),
4181            DataType::Int32 => Some(ScalarValue::Int32(Some(i32::MIN))),
4182            DataType::Int64 => Some(ScalarValue::Int64(Some(i64::MIN))),
4183            DataType::UInt8 => Some(ScalarValue::UInt8(Some(u8::MIN))),
4184            DataType::UInt16 => Some(ScalarValue::UInt16(Some(u16::MIN))),
4185            DataType::UInt32 => Some(ScalarValue::UInt32(Some(u32::MIN))),
4186            DataType::UInt64 => Some(ScalarValue::UInt64(Some(u64::MIN))),
4187            DataType::Float16 => Some(ScalarValue::Float16(Some(f16::NEG_INFINITY))),
4188            DataType::Float32 => Some(ScalarValue::Float32(Some(f32::NEG_INFINITY))),
4189            DataType::Float64 => Some(ScalarValue::Float64(Some(f64::NEG_INFINITY))),
4190            DataType::Decimal128(precision, scale) => {
4191                // For decimal, min is -10^(precision-scale) + 10^(-scale)
4192                // But for simplicity, we use the minimum i128 value that fits the precision
4193                let max_digits = 10_i128.pow(*precision as u32) - 1;
4194                Some(ScalarValue::Decimal128(
4195                    Some(-max_digits),
4196                    *precision,
4197                    *scale,
4198                ))
4199            }
4200            DataType::Decimal256(precision, scale) => {
4201                // Similar to Decimal128 but with i256
4202                // For now, use a large negative value
4203                let max_digits = i256::from_i128(10_i128)
4204                    .checked_pow(*precision as u32)
4205                    .and_then(|v| v.checked_sub(i256::from_i128(1)))
4206                    .unwrap_or(i256::MAX);
4207                Some(ScalarValue::Decimal256(
4208                    Some(max_digits.neg_wrapping()),
4209                    *precision,
4210                    *scale,
4211                ))
4212            }
4213            DataType::Date32 => Some(ScalarValue::Date32(Some(i32::MIN))),
4214            DataType::Date64 => Some(ScalarValue::Date64(Some(i64::MIN))),
4215            DataType::Time32(TimeUnit::Second) => {
4216                Some(ScalarValue::Time32Second(Some(0)))
4217            }
4218            DataType::Time32(TimeUnit::Millisecond) => {
4219                Some(ScalarValue::Time32Millisecond(Some(0)))
4220            }
4221            DataType::Time64(TimeUnit::Microsecond) => {
4222                Some(ScalarValue::Time64Microsecond(Some(0)))
4223            }
4224            DataType::Time64(TimeUnit::Nanosecond) => {
4225                Some(ScalarValue::Time64Nanosecond(Some(0)))
4226            }
4227            DataType::Timestamp(unit, tz) => match unit {
4228                TimeUnit::Second => {
4229                    Some(ScalarValue::TimestampSecond(Some(i64::MIN), tz.clone()))
4230                }
4231                TimeUnit::Millisecond => Some(ScalarValue::TimestampMillisecond(
4232                    Some(i64::MIN),
4233                    tz.clone(),
4234                )),
4235                TimeUnit::Microsecond => Some(ScalarValue::TimestampMicrosecond(
4236                    Some(i64::MIN),
4237                    tz.clone(),
4238                )),
4239                TimeUnit::Nanosecond => {
4240                    Some(ScalarValue::TimestampNanosecond(Some(i64::MIN), tz.clone()))
4241                }
4242            },
4243            DataType::Duration(unit) => match unit {
4244                TimeUnit::Second => Some(ScalarValue::DurationSecond(Some(i64::MIN))),
4245                TimeUnit::Millisecond => {
4246                    Some(ScalarValue::DurationMillisecond(Some(i64::MIN)))
4247                }
4248                TimeUnit::Microsecond => {
4249                    Some(ScalarValue::DurationMicrosecond(Some(i64::MIN)))
4250                }
4251                TimeUnit::Nanosecond => {
4252                    Some(ScalarValue::DurationNanosecond(Some(i64::MIN)))
4253                }
4254            },
4255            _ => None,
4256        }
4257    }
4258
4259    /// Returns the maximum value for the given numeric `DataType`.
4260    ///
4261    /// This function returns the largest representable value for numeric
4262    /// and temporal data types. For non-numeric types, it returns `None`.
4263    ///
4264    /// # Supported Types
4265    ///
4266    /// - **Integer types**: `i8::MAX`, `i16::MAX`, etc.
4267    /// - **Unsigned types**: `u8::MAX`, `u16::MAX`, etc.
4268    /// - **Float types**: Positive infinity (IEEE 754)
4269    /// - **Decimal types**: Largest value based on precision
4270    /// - **Temporal types**: Maximum timestamp/date values
4271    /// - **Time types**: Maximum time in the day (1 day - 1 unit)
4272    /// - **Duration types**: `i64::MAX`
4273    pub fn max(datatype: &DataType) -> Option<ScalarValue> {
4274        match datatype {
4275            DataType::Int8 => Some(ScalarValue::Int8(Some(i8::MAX))),
4276            DataType::Int16 => Some(ScalarValue::Int16(Some(i16::MAX))),
4277            DataType::Int32 => Some(ScalarValue::Int32(Some(i32::MAX))),
4278            DataType::Int64 => Some(ScalarValue::Int64(Some(i64::MAX))),
4279            DataType::UInt8 => Some(ScalarValue::UInt8(Some(u8::MAX))),
4280            DataType::UInt16 => Some(ScalarValue::UInt16(Some(u16::MAX))),
4281            DataType::UInt32 => Some(ScalarValue::UInt32(Some(u32::MAX))),
4282            DataType::UInt64 => Some(ScalarValue::UInt64(Some(u64::MAX))),
4283            DataType::Float16 => Some(ScalarValue::Float16(Some(f16::INFINITY))),
4284            DataType::Float32 => Some(ScalarValue::Float32(Some(f32::INFINITY))),
4285            DataType::Float64 => Some(ScalarValue::Float64(Some(f64::INFINITY))),
4286            DataType::Decimal128(precision, scale) => {
4287                // For decimal, max is 10^(precision-scale) - 10^(-scale)
4288                // But for simplicity, we use the maximum i128 value that fits the precision
4289                let max_digits = 10_i128.pow(*precision as u32) - 1;
4290                Some(ScalarValue::Decimal128(
4291                    Some(max_digits),
4292                    *precision,
4293                    *scale,
4294                ))
4295            }
4296            DataType::Decimal256(precision, scale) => {
4297                // Similar to Decimal128 but with i256
4298                let max_digits = i256::from_i128(10_i128)
4299                    .checked_pow(*precision as u32)
4300                    .and_then(|v| v.checked_sub(i256::from_i128(1)))
4301                    .unwrap_or(i256::MAX);
4302                Some(ScalarValue::Decimal256(
4303                    Some(max_digits),
4304                    *precision,
4305                    *scale,
4306                ))
4307            }
4308            DataType::Date32 => Some(ScalarValue::Date32(Some(i32::MAX))),
4309            DataType::Date64 => Some(ScalarValue::Date64(Some(i64::MAX))),
4310            DataType::Time32(TimeUnit::Second) => {
4311                // 86399 seconds = 23:59:59
4312                Some(ScalarValue::Time32Second(Some(86_399)))
4313            }
4314            DataType::Time32(TimeUnit::Millisecond) => {
4315                // 86_399_999 milliseconds = 23:59:59.999
4316                Some(ScalarValue::Time32Millisecond(Some(86_399_999)))
4317            }
4318            DataType::Time64(TimeUnit::Microsecond) => {
4319                // 86_399_999_999 microseconds = 23:59:59.999999
4320                Some(ScalarValue::Time64Microsecond(Some(86_399_999_999)))
4321            }
4322            DataType::Time64(TimeUnit::Nanosecond) => {
4323                // 86_399_999_999_999 nanoseconds = 23:59:59.999999999
4324                Some(ScalarValue::Time64Nanosecond(Some(86_399_999_999_999)))
4325            }
4326            DataType::Timestamp(unit, tz) => match unit {
4327                TimeUnit::Second => {
4328                    Some(ScalarValue::TimestampSecond(Some(i64::MAX), tz.clone()))
4329                }
4330                TimeUnit::Millisecond => Some(ScalarValue::TimestampMillisecond(
4331                    Some(i64::MAX),
4332                    tz.clone(),
4333                )),
4334                TimeUnit::Microsecond => Some(ScalarValue::TimestampMicrosecond(
4335                    Some(i64::MAX),
4336                    tz.clone(),
4337                )),
4338                TimeUnit::Nanosecond => {
4339                    Some(ScalarValue::TimestampNanosecond(Some(i64::MAX), tz.clone()))
4340                }
4341            },
4342            DataType::Duration(unit) => match unit {
4343                TimeUnit::Second => Some(ScalarValue::DurationSecond(Some(i64::MAX))),
4344                TimeUnit::Millisecond => {
4345                    Some(ScalarValue::DurationMillisecond(Some(i64::MAX)))
4346                }
4347                TimeUnit::Microsecond => {
4348                    Some(ScalarValue::DurationMicrosecond(Some(i64::MAX)))
4349                }
4350                TimeUnit::Nanosecond => {
4351                    Some(ScalarValue::DurationNanosecond(Some(i64::MAX)))
4352                }
4353            },
4354            _ => None,
4355        }
4356    }
4357}
4358
4359/// Compacts the data of an `ArrayData` into a new `ArrayData`.
4360///
4361/// This is useful when you want to minimize the memory footprint of an
4362/// `ArrayData`. For example, the value returned by [`Array::slice`] still
4363/// points at the same underlying data buffers as the original array, which may
4364/// hold many more values. Calling `copy_array_data` on the sliced array will
4365/// create a new, smaller, `ArrayData` that only contains the data for the
4366/// sliced array.
4367///
4368/// # Example
4369/// ```
4370/// # use arrow::array::{make_array, Array, Int32Array};
4371/// use datafusion_common::scalar::copy_array_data;
4372/// let array = Int32Array::from_iter_values(0..8192);
4373/// // Take only the first 2 elements
4374/// let sliced_array = array.slice(0, 2);
4375/// // The memory footprint of `sliced_array` is close to 8192 * 4 bytes
4376/// assert_eq!(32864, sliced_array.get_array_memory_size());
4377/// // however, we can copy the data to a new `ArrayData`
4378/// let new_array = make_array(copy_array_data(&sliced_array.into_data()));
4379/// // The memory footprint of `new_array` is now only 2 * 4 bytes
4380/// // and overhead:
4381/// assert_eq!(160, new_array.get_array_memory_size());
4382/// ```
4383///
4384/// See also [`ScalarValue::compact`] which applies to `ScalarValue` instances
4385/// as necessary.
4386pub fn copy_array_data(src_data: &ArrayData) -> ArrayData {
4387    let mut copy = MutableArrayData::new(vec![&src_data], true, src_data.len());
4388    copy.extend(0, 0, src_data.len());
4389    copy.freeze()
4390}
4391
4392macro_rules! impl_scalar {
4393    ($ty:ty, $scalar:tt) => {
4394        impl From<$ty> for ScalarValue {
4395            fn from(value: $ty) -> Self {
4396                ScalarValue::$scalar(Some(value))
4397            }
4398        }
4399
4400        impl From<Option<$ty>> for ScalarValue {
4401            fn from(value: Option<$ty>) -> Self {
4402                ScalarValue::$scalar(value)
4403            }
4404        }
4405    };
4406}
4407
4408impl_scalar!(f64, Float64);
4409impl_scalar!(f32, Float32);
4410impl_scalar!(i8, Int8);
4411impl_scalar!(i16, Int16);
4412impl_scalar!(i32, Int32);
4413impl_scalar!(i64, Int64);
4414impl_scalar!(bool, Boolean);
4415impl_scalar!(u8, UInt8);
4416impl_scalar!(u16, UInt16);
4417impl_scalar!(u32, UInt32);
4418impl_scalar!(u64, UInt64);
4419
4420impl From<&str> for ScalarValue {
4421    fn from(value: &str) -> Self {
4422        Some(value).into()
4423    }
4424}
4425
4426impl From<Option<&str>> for ScalarValue {
4427    fn from(value: Option<&str>) -> Self {
4428        let value = value.map(|s| s.to_string());
4429        value.into()
4430    }
4431}
4432
4433/// Wrapper to create ScalarValue::Struct for convenience
4434impl From<Vec<(&str, ScalarValue)>> for ScalarValue {
4435    fn from(value: Vec<(&str, ScalarValue)>) -> Self {
4436        value
4437            .into_iter()
4438            .fold(ScalarStructBuilder::new(), |builder, (name, value)| {
4439                builder.with_name_and_scalar(name, value)
4440            })
4441            .build()
4442            .unwrap()
4443    }
4444}
4445
4446impl FromStr for ScalarValue {
4447    type Err = Infallible;
4448
4449    fn from_str(s: &str) -> Result<Self, Self::Err> {
4450        Ok(s.into())
4451    }
4452}
4453
4454impl From<String> for ScalarValue {
4455    fn from(value: String) -> Self {
4456        Some(value).into()
4457    }
4458}
4459
4460impl From<Option<String>> for ScalarValue {
4461    fn from(value: Option<String>) -> Self {
4462        ScalarValue::Utf8(value)
4463    }
4464}
4465
4466macro_rules! impl_try_from {
4467    ($SCALAR:ident, $NATIVE:ident) => {
4468        impl TryFrom<ScalarValue> for $NATIVE {
4469            type Error = DataFusionError;
4470
4471            fn try_from(value: ScalarValue) -> Result<Self> {
4472                match value {
4473                    ScalarValue::$SCALAR(Some(inner_value)) => Ok(inner_value),
4474                    _ => _internal_err!(
4475                        "Cannot convert {:?} to {}",
4476                        value,
4477                        std::any::type_name::<Self>()
4478                    ),
4479                }
4480            }
4481        }
4482    };
4483}
4484
4485impl_try_from!(Int8, i8);
4486impl_try_from!(Int16, i16);
4487
4488// special implementation for i32 because of Date32 and Time32
4489impl TryFrom<ScalarValue> for i32 {
4490    type Error = DataFusionError;
4491
4492    fn try_from(value: ScalarValue) -> Result<Self> {
4493        match value {
4494            ScalarValue::Int32(Some(inner_value))
4495            | ScalarValue::Date32(Some(inner_value))
4496            | ScalarValue::Time32Second(Some(inner_value))
4497            | ScalarValue::Time32Millisecond(Some(inner_value)) => Ok(inner_value),
4498            _ => _internal_err!(
4499                "Cannot convert {:?} to {}",
4500                value,
4501                std::any::type_name::<Self>()
4502            ),
4503        }
4504    }
4505}
4506
4507// special implementation for i64 because of Date64, Time64 and Timestamp
4508impl TryFrom<ScalarValue> for i64 {
4509    type Error = DataFusionError;
4510
4511    fn try_from(value: ScalarValue) -> Result<Self> {
4512        match value {
4513            ScalarValue::Int64(Some(inner_value))
4514            | ScalarValue::Date64(Some(inner_value))
4515            | ScalarValue::Time64Microsecond(Some(inner_value))
4516            | ScalarValue::Time64Nanosecond(Some(inner_value))
4517            | ScalarValue::TimestampNanosecond(Some(inner_value), _)
4518            | ScalarValue::TimestampMicrosecond(Some(inner_value), _)
4519            | ScalarValue::TimestampMillisecond(Some(inner_value), _)
4520            | ScalarValue::TimestampSecond(Some(inner_value), _) => Ok(inner_value),
4521            _ => _internal_err!(
4522                "Cannot convert {:?} to {}",
4523                value,
4524                std::any::type_name::<Self>()
4525            ),
4526        }
4527    }
4528}
4529
4530// special implementation for i128 because of Decimal128
4531impl TryFrom<ScalarValue> for i128 {
4532    type Error = DataFusionError;
4533
4534    fn try_from(value: ScalarValue) -> Result<Self> {
4535        match value {
4536            ScalarValue::Decimal128(Some(inner_value), _, _) => Ok(inner_value),
4537            _ => _internal_err!(
4538                "Cannot convert {:?} to {}",
4539                value,
4540                std::any::type_name::<Self>()
4541            ),
4542        }
4543    }
4544}
4545
4546// special implementation for i256 because of Decimal128
4547impl TryFrom<ScalarValue> for i256 {
4548    type Error = DataFusionError;
4549
4550    fn try_from(value: ScalarValue) -> Result<Self> {
4551        match value {
4552            ScalarValue::Decimal256(Some(inner_value), _, _) => Ok(inner_value),
4553            _ => _internal_err!(
4554                "Cannot convert {:?} to {}",
4555                value,
4556                std::any::type_name::<Self>()
4557            ),
4558        }
4559    }
4560}
4561
4562impl_try_from!(UInt8, u8);
4563impl_try_from!(UInt16, u16);
4564impl_try_from!(UInt32, u32);
4565impl_try_from!(UInt64, u64);
4566impl_try_from!(Float32, f32);
4567impl_try_from!(Float64, f64);
4568impl_try_from!(Boolean, bool);
4569
4570impl TryFrom<DataType> for ScalarValue {
4571    type Error = DataFusionError;
4572
4573    /// Create a Null instance of ScalarValue for this datatype
4574    fn try_from(datatype: DataType) -> Result<Self> {
4575        (&datatype).try_into()
4576    }
4577}
4578
4579impl TryFrom<&DataType> for ScalarValue {
4580    type Error = DataFusionError;
4581
4582    /// Create a Null instance of ScalarValue for this datatype
4583    fn try_from(data_type: &DataType) -> Result<Self> {
4584        Self::try_new_null(data_type)
4585    }
4586}
4587
4588macro_rules! format_option {
4589    ($F:expr, $EXPR:expr) => {{
4590        match $EXPR {
4591            Some(e) => write!($F, "{e}"),
4592            None => write!($F, "NULL"),
4593        }
4594    }};
4595}
4596
4597// Implement Display trait for ScalarValue
4598//
4599// # Panics
4600//
4601// Panics if there is an error when creating a visual representation of columns via `arrow::util::pretty`
4602impl fmt::Display for ScalarValue {
4603    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
4604        match self {
4605            ScalarValue::Decimal32(v, p, s) => {
4606                write!(f, "{v:?},{p:?},{s:?}")?;
4607            }
4608            ScalarValue::Decimal64(v, p, s) => {
4609                write!(f, "{v:?},{p:?},{s:?}")?;
4610            }
4611            ScalarValue::Decimal128(v, p, s) => {
4612                write!(f, "{v:?},{p:?},{s:?}")?;
4613            }
4614            ScalarValue::Decimal256(v, p, s) => {
4615                write!(f, "{v:?},{p:?},{s:?}")?;
4616            }
4617            ScalarValue::Boolean(e) => format_option!(f, e)?,
4618            ScalarValue::Float16(e) => format_option!(f, e)?,
4619            ScalarValue::Float32(e) => format_option!(f, e)?,
4620            ScalarValue::Float64(e) => format_option!(f, e)?,
4621            ScalarValue::Int8(e) => format_option!(f, e)?,
4622            ScalarValue::Int16(e) => format_option!(f, e)?,
4623            ScalarValue::Int32(e) => format_option!(f, e)?,
4624            ScalarValue::Int64(e) => format_option!(f, e)?,
4625            ScalarValue::UInt8(e) => format_option!(f, e)?,
4626            ScalarValue::UInt16(e) => format_option!(f, e)?,
4627            ScalarValue::UInt32(e) => format_option!(f, e)?,
4628            ScalarValue::UInt64(e) => format_option!(f, e)?,
4629            ScalarValue::TimestampSecond(e, _) => format_option!(f, e)?,
4630            ScalarValue::TimestampMillisecond(e, _) => format_option!(f, e)?,
4631            ScalarValue::TimestampMicrosecond(e, _) => format_option!(f, e)?,
4632            ScalarValue::TimestampNanosecond(e, _) => format_option!(f, e)?,
4633            ScalarValue::Utf8(e)
4634            | ScalarValue::LargeUtf8(e)
4635            | ScalarValue::Utf8View(e) => format_option!(f, e)?,
4636            ScalarValue::Binary(e)
4637            | ScalarValue::FixedSizeBinary(_, e)
4638            | ScalarValue::LargeBinary(e)
4639            | ScalarValue::BinaryView(e) => match e {
4640                Some(bytes) => {
4641                    // print up to first 10 bytes, with trailing ... if needed
4642                    for b in bytes.iter().take(10) {
4643                        write!(f, "{b:02X}")?;
4644                    }
4645                    if bytes.len() > 10 {
4646                        write!(f, "...")?;
4647                    }
4648                }
4649                None => write!(f, "NULL")?,
4650            },
4651            ScalarValue::List(arr) => fmt_list(arr.to_owned() as ArrayRef, f)?,
4652            ScalarValue::LargeList(arr) => fmt_list(arr.to_owned() as ArrayRef, f)?,
4653            ScalarValue::FixedSizeList(arr) => fmt_list(arr.to_owned() as ArrayRef, f)?,
4654            ScalarValue::Date32(e) => format_option!(
4655                f,
4656                e.map(|v| {
4657                    let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
4658                    match epoch.checked_add_signed(Duration::try_days(v as i64).unwrap())
4659                    {
4660                        Some(date) => date.to_string(),
4661                        None => "".to_string(),
4662                    }
4663                })
4664            )?,
4665            ScalarValue::Date64(e) => format_option!(
4666                f,
4667                e.map(|v| {
4668                    let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
4669                    match epoch.checked_add_signed(Duration::try_milliseconds(v).unwrap())
4670                    {
4671                        Some(date) => date.to_string(),
4672                        None => "".to_string(),
4673                    }
4674                })
4675            )?,
4676            ScalarValue::Time32Second(e) => format_option!(f, e)?,
4677            ScalarValue::Time32Millisecond(e) => format_option!(f, e)?,
4678            ScalarValue::Time64Microsecond(e) => format_option!(f, e)?,
4679            ScalarValue::Time64Nanosecond(e) => format_option!(f, e)?,
4680            ScalarValue::IntervalYearMonth(e) => format_option!(f, e)?,
4681            ScalarValue::IntervalMonthDayNano(e) => {
4682                format_option!(f, e.map(|v| format!("{v:?}")))?
4683            }
4684            ScalarValue::IntervalDayTime(e) => {
4685                format_option!(f, e.map(|v| format!("{v:?}")))?;
4686            }
4687            ScalarValue::DurationSecond(e) => format_option!(f, e)?,
4688            ScalarValue::DurationMillisecond(e) => format_option!(f, e)?,
4689            ScalarValue::DurationMicrosecond(e) => format_option!(f, e)?,
4690            ScalarValue::DurationNanosecond(e) => format_option!(f, e)?,
4691            ScalarValue::Struct(struct_arr) => {
4692                // ScalarValue Struct should always have a single element
4693                assert_eq!(struct_arr.len(), 1);
4694
4695                if struct_arr.null_count() == struct_arr.len() {
4696                    write!(f, "NULL")?;
4697                    return Ok(());
4698                }
4699
4700                let columns = struct_arr.columns();
4701                let fields = struct_arr.fields();
4702                let nulls = struct_arr.nulls();
4703
4704                write!(
4705                    f,
4706                    "{{{}}}",
4707                    columns
4708                        .iter()
4709                        .zip(fields.iter())
4710                        .map(|(column, field)| {
4711                            if nulls.is_some_and(|b| b.is_null(0)) {
4712                                format!("{}:NULL", field.name())
4713                            } else if let DataType::Struct(_) = field.data_type() {
4714                                let sv = ScalarValue::Struct(Arc::new(
4715                                    column.as_struct().to_owned(),
4716                                ));
4717                                format!("{}:{sv}", field.name())
4718                            } else {
4719                                let sv = array_value_to_string(column, 0).unwrap();
4720                                format!("{}:{sv}", field.name())
4721                            }
4722                        })
4723                        .collect::<Vec<_>>()
4724                        .join(",")
4725                )?
4726            }
4727            ScalarValue::Map(map_arr) => {
4728                if map_arr.null_count() == map_arr.len() {
4729                    write!(f, "NULL")?;
4730                    return Ok(());
4731                }
4732
4733                write!(
4734                    f,
4735                    "[{}]",
4736                    map_arr
4737                        .iter()
4738                        .map(|struct_array| {
4739                            if let Some(arr) = struct_array {
4740                                let mut buffer = VecDeque::new();
4741                                for i in 0..arr.len() {
4742                                    let key =
4743                                        array_value_to_string(arr.column(0), i).unwrap();
4744                                    let value =
4745                                        array_value_to_string(arr.column(1), i).unwrap();
4746                                    buffer.push_back(format!("{key}:{value}"));
4747                                }
4748                                format!(
4749                                    "{{{}}}",
4750                                    buffer
4751                                        .into_iter()
4752                                        .collect::<Vec<_>>()
4753                                        .join(",")
4754                                        .as_str()
4755                                )
4756                            } else {
4757                                "NULL".to_string()
4758                            }
4759                        })
4760                        .collect::<Vec<_>>()
4761                        .join(",")
4762                )?
4763            }
4764            ScalarValue::Union(val, _fields, _mode) => match val {
4765                Some((id, val)) => write!(f, "{id}:{val}")?,
4766                None => write!(f, "NULL")?,
4767            },
4768            ScalarValue::Dictionary(_k, v) => write!(f, "{v}")?,
4769            ScalarValue::Null => write!(f, "NULL")?,
4770        };
4771        Ok(())
4772    }
4773}
4774
4775fn fmt_list(arr: ArrayRef, f: &mut fmt::Formatter) -> fmt::Result {
4776    // ScalarValue List, LargeList, FixedSizeList should always have a single element
4777    assert_eq!(arr.len(), 1);
4778    let options = FormatOptions::default().with_display_error(true);
4779    let formatter =
4780        ArrayFormatter::try_new(arr.as_ref() as &dyn Array, &options).unwrap();
4781    let value_formatter = formatter.value(0);
4782    write!(f, "{value_formatter}")
4783}
4784
4785/// writes a byte array to formatter. `[1, 2, 3]` ==> `"1,2,3"`
4786fn fmt_binary(data: &[u8], f: &mut fmt::Formatter) -> fmt::Result {
4787    let mut iter = data.iter();
4788    if let Some(b) = iter.next() {
4789        write!(f, "{b}")?;
4790    }
4791    for b in iter {
4792        write!(f, ",{b}")?;
4793    }
4794    Ok(())
4795}
4796
4797impl fmt::Debug for ScalarValue {
4798    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
4799        match self {
4800            ScalarValue::Decimal32(_, _, _) => write!(f, "Decimal32({self})"),
4801            ScalarValue::Decimal64(_, _, _) => write!(f, "Decimal64({self})"),
4802            ScalarValue::Decimal128(_, _, _) => write!(f, "Decimal128({self})"),
4803            ScalarValue::Decimal256(_, _, _) => write!(f, "Decimal256({self})"),
4804            ScalarValue::Boolean(_) => write!(f, "Boolean({self})"),
4805            ScalarValue::Float16(_) => write!(f, "Float16({self})"),
4806            ScalarValue::Float32(_) => write!(f, "Float32({self})"),
4807            ScalarValue::Float64(_) => write!(f, "Float64({self})"),
4808            ScalarValue::Int8(_) => write!(f, "Int8({self})"),
4809            ScalarValue::Int16(_) => write!(f, "Int16({self})"),
4810            ScalarValue::Int32(_) => write!(f, "Int32({self})"),
4811            ScalarValue::Int64(_) => write!(f, "Int64({self})"),
4812            ScalarValue::UInt8(_) => write!(f, "UInt8({self})"),
4813            ScalarValue::UInt16(_) => write!(f, "UInt16({self})"),
4814            ScalarValue::UInt32(_) => write!(f, "UInt32({self})"),
4815            ScalarValue::UInt64(_) => write!(f, "UInt64({self})"),
4816            ScalarValue::TimestampSecond(_, tz_opt) => {
4817                write!(f, "TimestampSecond({self}, {tz_opt:?})")
4818            }
4819            ScalarValue::TimestampMillisecond(_, tz_opt) => {
4820                write!(f, "TimestampMillisecond({self}, {tz_opt:?})")
4821            }
4822            ScalarValue::TimestampMicrosecond(_, tz_opt) => {
4823                write!(f, "TimestampMicrosecond({self}, {tz_opt:?})")
4824            }
4825            ScalarValue::TimestampNanosecond(_, tz_opt) => {
4826                write!(f, "TimestampNanosecond({self}, {tz_opt:?})")
4827            }
4828            ScalarValue::Utf8(None) => write!(f, "Utf8({self})"),
4829            ScalarValue::Utf8(Some(_)) => write!(f, "Utf8(\"{self}\")"),
4830            ScalarValue::Utf8View(None) => write!(f, "Utf8View({self})"),
4831            ScalarValue::Utf8View(Some(_)) => write!(f, "Utf8View(\"{self}\")"),
4832            ScalarValue::LargeUtf8(None) => write!(f, "LargeUtf8({self})"),
4833            ScalarValue::LargeUtf8(Some(_)) => write!(f, "LargeUtf8(\"{self}\")"),
4834            ScalarValue::Binary(None) => write!(f, "Binary({self})"),
4835            ScalarValue::Binary(Some(b)) => {
4836                write!(f, "Binary(\"")?;
4837                fmt_binary(b.as_slice(), f)?;
4838                write!(f, "\")")
4839            }
4840            ScalarValue::BinaryView(None) => write!(f, "BinaryView({self})"),
4841            ScalarValue::BinaryView(Some(b)) => {
4842                write!(f, "BinaryView(\"")?;
4843                fmt_binary(b.as_slice(), f)?;
4844                write!(f, "\")")
4845            }
4846            ScalarValue::FixedSizeBinary(size, None) => {
4847                write!(f, "FixedSizeBinary({size}, {self})")
4848            }
4849            ScalarValue::FixedSizeBinary(size, Some(b)) => {
4850                write!(f, "FixedSizeBinary({size}, \"")?;
4851                fmt_binary(b.as_slice(), f)?;
4852                write!(f, "\")")
4853            }
4854            ScalarValue::LargeBinary(None) => write!(f, "LargeBinary({self})"),
4855            ScalarValue::LargeBinary(Some(b)) => {
4856                write!(f, "LargeBinary(\"")?;
4857                fmt_binary(b.as_slice(), f)?;
4858                write!(f, "\")")
4859            }
4860            ScalarValue::FixedSizeList(_) => write!(f, "FixedSizeList({self})"),
4861            ScalarValue::List(_) => write!(f, "List({self})"),
4862            ScalarValue::LargeList(_) => write!(f, "LargeList({self})"),
4863            ScalarValue::Struct(struct_arr) => {
4864                // ScalarValue Struct should always have a single element
4865                assert_eq!(struct_arr.len(), 1);
4866
4867                let columns = struct_arr.columns();
4868                let fields = struct_arr.fields();
4869
4870                write!(
4871                    f,
4872                    "Struct({{{}}})",
4873                    columns
4874                        .iter()
4875                        .zip(fields.iter())
4876                        .map(|(column, field)| {
4877                            let sv = array_value_to_string(column, 0).unwrap();
4878                            let name = field.name();
4879                            format!("{name}:{sv}")
4880                        })
4881                        .collect::<Vec<_>>()
4882                        .join(",")
4883                )
4884            }
4885            ScalarValue::Map(map_arr) => {
4886                write!(
4887                    f,
4888                    "Map([{}])",
4889                    map_arr
4890                        .iter()
4891                        .map(|struct_array| {
4892                            if let Some(arr) = struct_array {
4893                                let buffer: Vec<String> = (0..arr.len())
4894                                    .map(|i| {
4895                                        let key = array_value_to_string(arr.column(0), i)
4896                                            .unwrap();
4897                                        let value =
4898                                            array_value_to_string(arr.column(1), i)
4899                                                .unwrap();
4900                                        format!("{key:?}:{value:?}")
4901                                    })
4902                                    .collect();
4903                                format!("{{{}}}", buffer.join(","))
4904                            } else {
4905                                "NULL".to_string()
4906                            }
4907                        })
4908                        .collect::<Vec<_>>()
4909                        .join(",")
4910                )
4911            }
4912            ScalarValue::Date32(_) => write!(f, "Date32(\"{self}\")"),
4913            ScalarValue::Date64(_) => write!(f, "Date64(\"{self}\")"),
4914            ScalarValue::Time32Second(_) => write!(f, "Time32Second(\"{self}\")"),
4915            ScalarValue::Time32Millisecond(_) => {
4916                write!(f, "Time32Millisecond(\"{self}\")")
4917            }
4918            ScalarValue::Time64Microsecond(_) => {
4919                write!(f, "Time64Microsecond(\"{self}\")")
4920            }
4921            ScalarValue::Time64Nanosecond(_) => {
4922                write!(f, "Time64Nanosecond(\"{self}\")")
4923            }
4924            ScalarValue::IntervalDayTime(_) => {
4925                write!(f, "IntervalDayTime(\"{self}\")")
4926            }
4927            ScalarValue::IntervalYearMonth(_) => {
4928                write!(f, "IntervalYearMonth(\"{self}\")")
4929            }
4930            ScalarValue::IntervalMonthDayNano(_) => {
4931                write!(f, "IntervalMonthDayNano(\"{self}\")")
4932            }
4933            ScalarValue::DurationSecond(_) => write!(f, "DurationSecond(\"{self}\")"),
4934            ScalarValue::DurationMillisecond(_) => {
4935                write!(f, "DurationMillisecond(\"{self}\")")
4936            }
4937            ScalarValue::DurationMicrosecond(_) => {
4938                write!(f, "DurationMicrosecond(\"{self}\")")
4939            }
4940            ScalarValue::DurationNanosecond(_) => {
4941                write!(f, "DurationNanosecond(\"{self}\")")
4942            }
4943            ScalarValue::Union(val, _fields, _mode) => match val {
4944                Some((id, val)) => write!(f, "Union {id}:{val}"),
4945                None => write!(f, "Union(NULL)"),
4946            },
4947            ScalarValue::Dictionary(k, v) => write!(f, "Dictionary({k:?}, {v:?})"),
4948            ScalarValue::Null => write!(f, "NULL"),
4949        }
4950    }
4951}
4952
4953/// Trait used to map a NativeType to a ScalarValue
4954pub trait ScalarType<T: ArrowNativeType> {
4955    /// returns a scalar from an optional T
4956    fn scalar(r: Option<T>) -> ScalarValue;
4957}
4958
4959impl ScalarType<f32> for Float32Type {
4960    fn scalar(r: Option<f32>) -> ScalarValue {
4961        ScalarValue::Float32(r)
4962    }
4963}
4964
4965impl ScalarType<i64> for TimestampSecondType {
4966    fn scalar(r: Option<i64>) -> ScalarValue {
4967        ScalarValue::TimestampSecond(r, None)
4968    }
4969}
4970
4971impl ScalarType<i64> for TimestampMillisecondType {
4972    fn scalar(r: Option<i64>) -> ScalarValue {
4973        ScalarValue::TimestampMillisecond(r, None)
4974    }
4975}
4976
4977impl ScalarType<i64> for TimestampMicrosecondType {
4978    fn scalar(r: Option<i64>) -> ScalarValue {
4979        ScalarValue::TimestampMicrosecond(r, None)
4980    }
4981}
4982
4983impl ScalarType<i64> for TimestampNanosecondType {
4984    fn scalar(r: Option<i64>) -> ScalarValue {
4985        ScalarValue::TimestampNanosecond(r, None)
4986    }
4987}
4988
4989impl ScalarType<i32> for Date32Type {
4990    fn scalar(r: Option<i32>) -> ScalarValue {
4991        ScalarValue::Date32(r)
4992    }
4993}
4994
4995#[cfg(test)]
4996mod tests {
4997    use std::sync::Arc;
4998
4999    use super::*;
5000    use crate::cast::{as_list_array, as_map_array, as_struct_array};
5001    use crate::test_util::batches_to_string;
5002    use arrow::array::{
5003        FixedSizeListBuilder, Int32Builder, LargeListBuilder, ListBuilder, MapBuilder,
5004        NullArray, NullBufferBuilder, OffsetSizeTrait, PrimitiveBuilder, RecordBatch,
5005        StringBuilder, StringDictionaryBuilder, StructBuilder, UnionBuilder,
5006    };
5007    use arrow::buffer::{Buffer, NullBuffer, OffsetBuffer};
5008    use arrow::compute::{is_null, kernels};
5009    use arrow::datatypes::{
5010        ArrowNumericType, Fields, Float64Type, DECIMAL256_MAX_PRECISION,
5011    };
5012    use arrow::error::ArrowError;
5013    use arrow::util::pretty::pretty_format_columns;
5014    use chrono::NaiveDate;
5015    use insta::assert_snapshot;
5016    use rand::Rng;
5017
5018    #[test]
5019    fn test_scalar_value_from_for_map() {
5020        let string_builder = StringBuilder::new();
5021        let int_builder = Int32Builder::with_capacity(4);
5022        let mut builder = MapBuilder::new(None, string_builder, int_builder);
5023        builder.keys().append_value("joe");
5024        builder.values().append_value(1);
5025        builder.append(true).unwrap();
5026
5027        builder.keys().append_value("blogs");
5028        builder.values().append_value(2);
5029        builder.keys().append_value("foo");
5030        builder.values().append_value(4);
5031        builder.append(true).unwrap();
5032        builder.append(true).unwrap();
5033        builder.append(false).unwrap();
5034
5035        let expected = builder.finish();
5036
5037        let sv = ScalarValue::Map(Arc::new(expected.clone()));
5038        let map_arr = sv.to_array().unwrap();
5039        let actual = as_map_array(&map_arr).unwrap();
5040        assert_eq!(actual, &expected);
5041    }
5042
5043    #[test]
5044    fn test_scalar_value_from_for_struct() {
5045        let boolean = Arc::new(BooleanArray::from(vec![false]));
5046        let int = Arc::new(Int32Array::from(vec![42]));
5047
5048        let expected = StructArray::from(vec![
5049            (
5050                Arc::new(Field::new("b", DataType::Boolean, false)),
5051                Arc::clone(&boolean) as ArrayRef,
5052            ),
5053            (
5054                Arc::new(Field::new("c", DataType::Int32, false)),
5055                Arc::clone(&int) as ArrayRef,
5056            ),
5057        ]);
5058
5059        let sv = ScalarStructBuilder::new()
5060            .with_array(Field::new("b", DataType::Boolean, false), boolean)
5061            .with_array(Field::new("c", DataType::Int32, false), int)
5062            .build()
5063            .unwrap();
5064
5065        let struct_arr = sv.to_array().unwrap();
5066        let actual = as_struct_array(&struct_arr).unwrap();
5067        assert_eq!(actual, &expected);
5068    }
5069
5070    #[test]
5071    #[should_panic(
5072        expected = "InvalidArgumentError(\"Incorrect array length for StructArray field \\\"bool\\\", expected 1 got 4\")"
5073    )]
5074    fn test_scalar_value_from_for_struct_should_panic() {
5075        let _ = ScalarStructBuilder::new()
5076            .with_array(
5077                Field::new("bool", DataType::Boolean, false),
5078                Arc::new(BooleanArray::from(vec![false, true, false, false])),
5079            )
5080            .with_array(
5081                Field::new("i32", DataType::Int32, false),
5082                Arc::new(Int32Array::from(vec![42, 28, 19, 31])),
5083            )
5084            .build()
5085            .unwrap();
5086    }
5087
5088    #[test]
5089    fn test_to_array_of_size_for_nested() {
5090        // Struct
5091        let boolean = Arc::new(BooleanArray::from(vec![false, false, true, true]));
5092        let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
5093
5094        let struct_array = StructArray::from(vec![
5095            (
5096                Arc::new(Field::new("b", DataType::Boolean, false)),
5097                Arc::clone(&boolean) as ArrayRef,
5098            ),
5099            (
5100                Arc::new(Field::new("c", DataType::Int32, false)),
5101                Arc::clone(&int) as ArrayRef,
5102            ),
5103        ]);
5104        let sv = ScalarValue::Struct(Arc::new(struct_array));
5105        let actual_arr = sv.to_array_of_size(2).unwrap();
5106
5107        let boolean = Arc::new(BooleanArray::from(vec![
5108            false, false, true, true, false, false, true, true,
5109        ]));
5110        let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31, 42, 28, 19, 31]));
5111
5112        let struct_array = StructArray::from(vec![
5113            (
5114                Arc::new(Field::new("b", DataType::Boolean, false)),
5115                Arc::clone(&boolean) as ArrayRef,
5116            ),
5117            (
5118                Arc::new(Field::new("c", DataType::Int32, false)),
5119                Arc::clone(&int) as ArrayRef,
5120            ),
5121        ]);
5122
5123        let actual = as_struct_array(&actual_arr).unwrap();
5124        assert_eq!(actual, &struct_array);
5125
5126        // List
5127        let arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
5128            Some(1),
5129            None,
5130            Some(2),
5131        ])]);
5132
5133        let sv = ScalarValue::List(Arc::new(arr));
5134        let actual_arr = sv
5135            .to_array_of_size(2)
5136            .expect("Failed to convert to array of size");
5137        let actual_list_arr = actual_arr.as_list::<i32>();
5138
5139        let arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
5140            Some(vec![Some(1), None, Some(2)]),
5141            Some(vec![Some(1), None, Some(2)]),
5142        ]);
5143
5144        assert_eq!(&arr, actual_list_arr);
5145    }
5146
5147    #[test]
5148    fn test_to_array_of_size_for_fsl() {
5149        let values = Int32Array::from_iter([Some(1), None, Some(2)]);
5150        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
5151        let arr = FixedSizeListArray::new(Arc::clone(&field), 3, Arc::new(values), None);
5152        let sv = ScalarValue::FixedSizeList(Arc::new(arr));
5153        let actual_arr = sv
5154            .to_array_of_size(2)
5155            .expect("Failed to convert to array of size");
5156
5157        let expected_values =
5158            Int32Array::from_iter([Some(1), None, Some(2), Some(1), None, Some(2)]);
5159        let expected_arr =
5160            FixedSizeListArray::new(field, 3, Arc::new(expected_values), None);
5161
5162        assert_eq!(
5163            &expected_arr,
5164            as_fixed_size_list_array(actual_arr.as_ref()).unwrap()
5165        );
5166
5167        let empty_array = sv
5168            .to_array_of_size(0)
5169            .expect("Failed to convert to empty array");
5170
5171        assert_eq!(empty_array.len(), 0);
5172    }
5173
5174    #[test]
5175    fn test_list_to_array_string() {
5176        let scalars = vec![
5177            ScalarValue::from("rust"),
5178            ScalarValue::from("arrow"),
5179            ScalarValue::from("data-fusion"),
5180        ];
5181
5182        let result = ScalarValue::new_list_nullable(scalars.as_slice(), &DataType::Utf8);
5183
5184        let expected = single_row_list_array(vec!["rust", "arrow", "data-fusion"]);
5185        assert_eq!(*result, expected);
5186    }
5187
5188    fn single_row_list_array(items: Vec<&str>) -> ListArray {
5189        SingleRowListArrayBuilder::new(Arc::new(StringArray::from(items)))
5190            .build_list_array()
5191    }
5192
5193    fn build_list<O: OffsetSizeTrait>(
5194        values: Vec<Option<Vec<Option<i64>>>>,
5195    ) -> Vec<ScalarValue> {
5196        values
5197            .into_iter()
5198            .map(|v| {
5199                let arr = if v.is_some() {
5200                    Arc::new(
5201                        GenericListArray::<O>::from_iter_primitive::<Int64Type, _, _>(
5202                            vec![v],
5203                        ),
5204                    )
5205                } else if O::IS_LARGE {
5206                    new_null_array(
5207                        &DataType::LargeList(Arc::new(Field::new_list_field(
5208                            DataType::Int64,
5209                            true,
5210                        ))),
5211                        1,
5212                    )
5213                } else {
5214                    new_null_array(
5215                        &DataType::List(Arc::new(Field::new_list_field(
5216                            DataType::Int64,
5217                            true,
5218                        ))),
5219                        1,
5220                    )
5221                };
5222
5223                if O::IS_LARGE {
5224                    ScalarValue::LargeList(arr.as_list::<i64>().to_owned().into())
5225                } else {
5226                    ScalarValue::List(arr.as_list::<i32>().to_owned().into())
5227                }
5228            })
5229            .collect()
5230    }
5231
5232    #[test]
5233    fn test_iter_to_array_fixed_size_list() {
5234        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
5235        let f1 = Arc::new(FixedSizeListArray::new(
5236            Arc::clone(&field),
5237            3,
5238            Arc::new(Int32Array::from(vec![1, 2, 3])),
5239            None,
5240        ));
5241        let f2 = Arc::new(FixedSizeListArray::new(
5242            Arc::clone(&field),
5243            3,
5244            Arc::new(Int32Array::from(vec![4, 5, 6])),
5245            None,
5246        ));
5247        let f_nulls = Arc::new(FixedSizeListArray::new_null(field, 1, 1));
5248
5249        let scalars = vec![
5250            ScalarValue::FixedSizeList(Arc::clone(&f_nulls)),
5251            ScalarValue::FixedSizeList(f1),
5252            ScalarValue::FixedSizeList(f2),
5253            ScalarValue::FixedSizeList(f_nulls),
5254        ];
5255
5256        let array = ScalarValue::iter_to_array(scalars).unwrap();
5257
5258        let expected = FixedSizeListArray::from_iter_primitive::<Int32Type, _, _>(
5259            vec![
5260                None,
5261                Some(vec![Some(1), Some(2), Some(3)]),
5262                Some(vec![Some(4), Some(5), Some(6)]),
5263                None,
5264            ],
5265            3,
5266        );
5267        assert_eq!(array.as_ref(), &expected);
5268    }
5269
5270    #[test]
5271    fn test_iter_to_array_struct() {
5272        let s1 = StructArray::from(vec![
5273            (
5274                Arc::new(Field::new("A", DataType::Boolean, false)),
5275                Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
5276            ),
5277            (
5278                Arc::new(Field::new("B", DataType::Int32, false)),
5279                Arc::new(Int32Array::from(vec![42])) as ArrayRef,
5280            ),
5281        ]);
5282
5283        let s2 = StructArray::from(vec![
5284            (
5285                Arc::new(Field::new("A", DataType::Boolean, false)),
5286                Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
5287            ),
5288            (
5289                Arc::new(Field::new("B", DataType::Int32, false)),
5290                Arc::new(Int32Array::from(vec![42])) as ArrayRef,
5291            ),
5292        ]);
5293
5294        let scalars = vec![
5295            ScalarValue::Struct(Arc::new(s1)),
5296            ScalarValue::Struct(Arc::new(s2)),
5297        ];
5298
5299        let array = ScalarValue::iter_to_array(scalars).unwrap();
5300
5301        let expected = StructArray::from(vec![
5302            (
5303                Arc::new(Field::new("A", DataType::Boolean, false)),
5304                Arc::new(BooleanArray::from(vec![false, false])) as ArrayRef,
5305            ),
5306            (
5307                Arc::new(Field::new("B", DataType::Int32, false)),
5308                Arc::new(Int32Array::from(vec![42, 42])) as ArrayRef,
5309            ),
5310        ]);
5311        assert_eq!(array.as_ref(), &expected);
5312    }
5313
5314    #[test]
5315    fn test_iter_to_array_struct_with_nulls() {
5316        // non-null
5317        let s1 = StructArray::from((
5318            vec![
5319                (
5320                    Arc::new(Field::new("A", DataType::Int32, false)),
5321                    Arc::new(Int32Array::from(vec![1])) as ArrayRef,
5322                ),
5323                (
5324                    Arc::new(Field::new("B", DataType::Int64, false)),
5325                    Arc::new(Int64Array::from(vec![2])) as ArrayRef,
5326                ),
5327            ],
5328            // Present the null mask, 1 is non-null, 0 is null
5329            Buffer::from(&[1]),
5330        ));
5331
5332        // null
5333        let s2 = StructArray::from((
5334            vec![
5335                (
5336                    Arc::new(Field::new("A", DataType::Int32, false)),
5337                    Arc::new(Int32Array::from(vec![3])) as ArrayRef,
5338                ),
5339                (
5340                    Arc::new(Field::new("B", DataType::Int64, false)),
5341                    Arc::new(Int64Array::from(vec![4])) as ArrayRef,
5342                ),
5343            ],
5344            Buffer::from(&[0]),
5345        ));
5346
5347        let scalars = vec![
5348            ScalarValue::Struct(Arc::new(s1)),
5349            ScalarValue::Struct(Arc::new(s2)),
5350        ];
5351
5352        let array = ScalarValue::iter_to_array(scalars).unwrap();
5353        let struct_array = array.as_struct();
5354        assert!(struct_array.is_valid(0));
5355        assert!(struct_array.is_null(1));
5356    }
5357
5358    #[test]
5359    fn iter_to_array_primitive_test() {
5360        // List[[1,2,3]], List[null], List[[4,5]]
5361        let scalars = build_list::<i32>(vec![
5362            Some(vec![Some(1), Some(2), Some(3)]),
5363            None,
5364            Some(vec![Some(4), Some(5)]),
5365        ]);
5366
5367        let array = ScalarValue::iter_to_array(scalars).unwrap();
5368        let list_array = as_list_array(&array).unwrap();
5369        // List[[1,2,3], null, [4,5]]
5370        let expected = ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
5371            Some(vec![Some(1), Some(2), Some(3)]),
5372            None,
5373            Some(vec![Some(4), Some(5)]),
5374        ]);
5375        assert_eq!(list_array, &expected);
5376
5377        let scalars = build_list::<i64>(vec![
5378            Some(vec![Some(1), Some(2), Some(3)]),
5379            None,
5380            Some(vec![Some(4), Some(5)]),
5381        ]);
5382
5383        let array = ScalarValue::iter_to_array(scalars).unwrap();
5384        let list_array = as_large_list_array(&array).unwrap();
5385        let expected = LargeListArray::from_iter_primitive::<Int64Type, _, _>(vec![
5386            Some(vec![Some(1), Some(2), Some(3)]),
5387            None,
5388            Some(vec![Some(4), Some(5)]),
5389        ]);
5390        assert_eq!(list_array, &expected);
5391    }
5392
5393    #[test]
5394    fn iter_to_array_string_test() {
5395        let arr1 = single_row_list_array(vec!["foo", "bar", "baz"]);
5396        let arr2 = single_row_list_array(vec!["rust", "world"]);
5397
5398        let scalars = vec![
5399            ScalarValue::List(Arc::new(arr1)),
5400            ScalarValue::List(Arc::new(arr2)),
5401        ];
5402
5403        let array = ScalarValue::iter_to_array(scalars).unwrap();
5404        let result = array.as_list::<i32>();
5405
5406        // build expected array
5407        let string_builder = StringBuilder::with_capacity(5, 25);
5408        let mut list_of_string_builder = ListBuilder::new(string_builder);
5409
5410        list_of_string_builder.values().append_value("foo");
5411        list_of_string_builder.values().append_value("bar");
5412        list_of_string_builder.values().append_value("baz");
5413        list_of_string_builder.append(true);
5414
5415        list_of_string_builder.values().append_value("rust");
5416        list_of_string_builder.values().append_value("world");
5417        list_of_string_builder.append(true);
5418        let expected = list_of_string_builder.finish();
5419
5420        assert_eq!(result, &expected);
5421    }
5422
5423    #[test]
5424    fn test_list_scalar_eq_to_array() {
5425        let list_array: ArrayRef =
5426            Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
5427                Some(vec![Some(0), Some(1), Some(2)]),
5428                None,
5429                Some(vec![None, Some(5)]),
5430            ]));
5431
5432        let fsl_array: ArrayRef =
5433            Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
5434                Some(vec![Some(0), Some(1), Some(2)]),
5435                None,
5436                Some(vec![Some(3), None, Some(5)]),
5437            ]));
5438
5439        for arr in [list_array, fsl_array] {
5440            for i in 0..arr.len() {
5441                let scalar =
5442                    ScalarValue::List(arr.slice(i, 1).as_list::<i32>().to_owned().into());
5443                assert!(scalar.eq_array(&arr, i).unwrap());
5444            }
5445        }
5446    }
5447
5448    #[test]
5449    fn test_eq_array_err_message() {
5450        assert_starts_with(
5451            ScalarValue::Utf8(Some("123".to_string()))
5452                .eq_array(&(Arc::new(Int32Array::from(vec![123])) as ArrayRef), 0)
5453                .unwrap_err()
5454                .message(),
5455            "could not cast array of type Int32 to arrow_array::array::byte_array::GenericByteArray<arrow_array::types::GenericStringType<i32>>",
5456        );
5457    }
5458
5459    #[test]
5460    fn scalar_add_trait_test() -> Result<()> {
5461        let float_value = ScalarValue::Float64(Some(123.));
5462        let float_value_2 = ScalarValue::Float64(Some(123.));
5463        assert_eq!(
5464            (float_value.add(&float_value_2))?,
5465            ScalarValue::Float64(Some(246.))
5466        );
5467        assert_eq!(
5468            (float_value.add(float_value_2))?,
5469            ScalarValue::Float64(Some(246.))
5470        );
5471        Ok(())
5472    }
5473
5474    #[test]
5475    fn scalar_sub_trait_test() -> Result<()> {
5476        let float_value = ScalarValue::Float64(Some(123.));
5477        let float_value_2 = ScalarValue::Float64(Some(123.));
5478        assert_eq!(
5479            float_value.sub(&float_value_2)?,
5480            ScalarValue::Float64(Some(0.))
5481        );
5482        assert_eq!(
5483            float_value.sub(float_value_2)?,
5484            ScalarValue::Float64(Some(0.))
5485        );
5486        Ok(())
5487    }
5488
5489    #[test]
5490    fn scalar_sub_trait_int32_test() -> Result<()> {
5491        let int_value = ScalarValue::Int32(Some(42));
5492        let int_value_2 = ScalarValue::Int32(Some(100));
5493        assert_eq!(int_value.sub(&int_value_2)?, ScalarValue::Int32(Some(-58)));
5494        assert_eq!(int_value_2.sub(int_value)?, ScalarValue::Int32(Some(58)));
5495        Ok(())
5496    }
5497
5498    #[test]
5499    fn scalar_sub_trait_int32_overflow_test() {
5500        let int_value = ScalarValue::Int32(Some(i32::MAX));
5501        let int_value_2 = ScalarValue::Int32(Some(i32::MIN));
5502        let err = int_value
5503            .sub_checked(&int_value_2)
5504            .unwrap_err()
5505            .strip_backtrace();
5506        assert_eq!(
5507            err,
5508            "Arrow error: Arithmetic overflow: Overflow happened on: 2147483647 - -2147483648"
5509        )
5510    }
5511
5512    #[test]
5513    fn scalar_sub_trait_int64_test() -> Result<()> {
5514        let int_value = ScalarValue::Int64(Some(42));
5515        let int_value_2 = ScalarValue::Int64(Some(100));
5516        assert_eq!(int_value.sub(&int_value_2)?, ScalarValue::Int64(Some(-58)));
5517        assert_eq!(int_value_2.sub(int_value)?, ScalarValue::Int64(Some(58)));
5518        Ok(())
5519    }
5520
5521    #[test]
5522    fn scalar_sub_trait_int64_overflow_test() {
5523        let int_value = ScalarValue::Int64(Some(i64::MAX));
5524        let int_value_2 = ScalarValue::Int64(Some(i64::MIN));
5525        let err = int_value
5526            .sub_checked(&int_value_2)
5527            .unwrap_err()
5528            .strip_backtrace();
5529        assert_eq!(err, "Arrow error: Arithmetic overflow: Overflow happened on: 9223372036854775807 - -9223372036854775808")
5530    }
5531
5532    #[test]
5533    fn scalar_add_overflow_test() -> Result<()> {
5534        check_scalar_add_overflow::<Int8Type>(
5535            ScalarValue::Int8(Some(i8::MAX)),
5536            ScalarValue::Int8(Some(i8::MAX)),
5537        );
5538        check_scalar_add_overflow::<UInt8Type>(
5539            ScalarValue::UInt8(Some(u8::MAX)),
5540            ScalarValue::UInt8(Some(u8::MAX)),
5541        );
5542        check_scalar_add_overflow::<Int16Type>(
5543            ScalarValue::Int16(Some(i16::MAX)),
5544            ScalarValue::Int16(Some(i16::MAX)),
5545        );
5546        check_scalar_add_overflow::<UInt16Type>(
5547            ScalarValue::UInt16(Some(u16::MAX)),
5548            ScalarValue::UInt16(Some(u16::MAX)),
5549        );
5550        check_scalar_add_overflow::<Int32Type>(
5551            ScalarValue::Int32(Some(i32::MAX)),
5552            ScalarValue::Int32(Some(i32::MAX)),
5553        );
5554        check_scalar_add_overflow::<UInt32Type>(
5555            ScalarValue::UInt32(Some(u32::MAX)),
5556            ScalarValue::UInt32(Some(u32::MAX)),
5557        );
5558        check_scalar_add_overflow::<Int64Type>(
5559            ScalarValue::Int64(Some(i64::MAX)),
5560            ScalarValue::Int64(Some(i64::MAX)),
5561        );
5562        check_scalar_add_overflow::<UInt64Type>(
5563            ScalarValue::UInt64(Some(u64::MAX)),
5564            ScalarValue::UInt64(Some(u64::MAX)),
5565        );
5566
5567        Ok(())
5568    }
5569
5570    // Verifies that ScalarValue has the same behavior with compute kernel when it overflows.
5571    fn check_scalar_add_overflow<T>(left: ScalarValue, right: ScalarValue)
5572    where
5573        T: ArrowNumericType,
5574    {
5575        let scalar_result = left.add_checked(&right);
5576
5577        let left_array = left.to_array().expect("Failed to convert to array");
5578        let right_array = right.to_array().expect("Failed to convert to array");
5579        let arrow_left_array = left_array.as_primitive::<T>();
5580        let arrow_right_array = right_array.as_primitive::<T>();
5581        let arrow_result = add(arrow_left_array, arrow_right_array);
5582
5583        assert_eq!(scalar_result.is_ok(), arrow_result.is_ok());
5584    }
5585
5586    #[test]
5587    fn test_interval_add_timestamp() -> Result<()> {
5588        let interval = ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano {
5589            months: 1,
5590            days: 2,
5591            nanoseconds: 3,
5592        }));
5593        let timestamp = ScalarValue::TimestampNanosecond(Some(123), None);
5594        let result = interval.add(&timestamp)?;
5595        let expect = timestamp.add(&interval)?;
5596        assert_eq!(result, expect);
5597
5598        let interval = ScalarValue::IntervalYearMonth(Some(123));
5599        let timestamp = ScalarValue::TimestampNanosecond(Some(123), None);
5600        let result = interval.add(&timestamp)?;
5601        let expect = timestamp.add(&interval)?;
5602        assert_eq!(result, expect);
5603
5604        let interval = ScalarValue::IntervalDayTime(Some(IntervalDayTime {
5605            days: 1,
5606            milliseconds: 23,
5607        }));
5608        let timestamp = ScalarValue::TimestampNanosecond(Some(123), None);
5609        let result = interval.add(&timestamp)?;
5610        let expect = timestamp.add(&interval)?;
5611        assert_eq!(result, expect);
5612        Ok(())
5613    }
5614
5615    #[test]
5616    fn test_try_cmp() {
5617        assert_eq!(
5618            ScalarValue::try_cmp(
5619                &ScalarValue::Int32(Some(1)),
5620                &ScalarValue::Int32(Some(2))
5621            )
5622            .unwrap(),
5623            Ordering::Less
5624        );
5625        assert_eq!(
5626            ScalarValue::try_cmp(&ScalarValue::Int32(None), &ScalarValue::Int32(Some(2)))
5627                .unwrap(),
5628            Ordering::Less
5629        );
5630        assert_starts_with(
5631            ScalarValue::try_cmp(
5632                &ScalarValue::Int32(Some(1)),
5633                &ScalarValue::Int64(Some(2)),
5634            )
5635            .unwrap_err()
5636            .message(),
5637            "Uncomparable values: Int32(1), Int64(2)",
5638        );
5639    }
5640
5641    #[test]
5642    fn scalar_decimal_test() -> Result<()> {
5643        let decimal_value = ScalarValue::Decimal128(Some(123), 10, 1);
5644        assert_eq!(DataType::Decimal128(10, 1), decimal_value.data_type());
5645        let try_into_value: i128 = decimal_value.clone().try_into().unwrap();
5646        assert_eq!(123_i128, try_into_value);
5647        assert!(!decimal_value.is_null());
5648        let neg_decimal_value = decimal_value.arithmetic_negate()?;
5649        match neg_decimal_value {
5650            ScalarValue::Decimal128(v, _, _) => {
5651                assert_eq!(-123, v.unwrap());
5652            }
5653            _ => {
5654                unreachable!();
5655            }
5656        }
5657
5658        // decimal scalar to array
5659        let array = decimal_value
5660            .to_array()
5661            .expect("Failed to convert to array");
5662        let array = as_decimal128_array(&array)?;
5663        assert_eq!(1, array.len());
5664        assert_eq!(DataType::Decimal128(10, 1), array.data_type().clone());
5665        assert_eq!(123i128, array.value(0));
5666
5667        // decimal scalar to array with size
5668        let array = decimal_value
5669            .to_array_of_size(10)
5670            .expect("Failed to convert to array of size");
5671        let array_decimal = as_decimal128_array(&array)?;
5672        assert_eq!(10, array.len());
5673        assert_eq!(DataType::Decimal128(10, 1), array.data_type().clone());
5674        assert_eq!(123i128, array_decimal.value(0));
5675        assert_eq!(123i128, array_decimal.value(9));
5676        // test eq array
5677        assert!(decimal_value
5678            .eq_array(&array, 1)
5679            .expect("Failed to compare arrays"));
5680        assert!(decimal_value
5681            .eq_array(&array, 5)
5682            .expect("Failed to compare arrays"));
5683        // test try from array
5684        assert_eq!(
5685            decimal_value,
5686            ScalarValue::try_from_array(&array, 5).unwrap()
5687        );
5688
5689        assert_eq!(
5690            decimal_value,
5691            ScalarValue::try_new_decimal128(123, 10, 1).unwrap()
5692        );
5693
5694        // test compare
5695        let left = ScalarValue::Decimal128(Some(123), 10, 2);
5696        let right = ScalarValue::Decimal128(Some(124), 10, 2);
5697        assert!(!left.eq(&right));
5698        let result = left < right;
5699        assert!(result);
5700        let result = left <= right;
5701        assert!(result);
5702        let right = ScalarValue::Decimal128(Some(124), 10, 3);
5703        // make sure that two decimals with diff datatype can't be compared.
5704        let result = left.partial_cmp(&right);
5705        assert_eq!(None, result);
5706
5707        let decimal_vec = vec![
5708            ScalarValue::Decimal128(Some(1), 10, 2),
5709            ScalarValue::Decimal128(Some(2), 10, 2),
5710            ScalarValue::Decimal128(Some(3), 10, 2),
5711        ];
5712        // convert the vec to decimal array and check the result
5713        let array = ScalarValue::iter_to_array(decimal_vec).unwrap();
5714        assert_eq!(3, array.len());
5715        assert_eq!(DataType::Decimal128(10, 2), array.data_type().clone());
5716
5717        let decimal_vec = vec![
5718            ScalarValue::Decimal128(Some(1), 10, 2),
5719            ScalarValue::Decimal128(Some(2), 10, 2),
5720            ScalarValue::Decimal128(Some(3), 10, 2),
5721            ScalarValue::Decimal128(None, 10, 2),
5722        ];
5723        let array = ScalarValue::iter_to_array(decimal_vec).unwrap();
5724        assert_eq!(4, array.len());
5725        assert_eq!(DataType::Decimal128(10, 2), array.data_type().clone());
5726
5727        assert!(ScalarValue::try_new_decimal128(1, 10, 2)
5728            .unwrap()
5729            .eq_array(&array, 0)
5730            .expect("Failed to compare arrays"));
5731        assert!(ScalarValue::try_new_decimal128(2, 10, 2)
5732            .unwrap()
5733            .eq_array(&array, 1)
5734            .expect("Failed to compare arrays"));
5735        assert!(ScalarValue::try_new_decimal128(3, 10, 2)
5736            .unwrap()
5737            .eq_array(&array, 2)
5738            .expect("Failed to compare arrays"));
5739        assert_eq!(
5740            ScalarValue::Decimal128(None, 10, 2),
5741            ScalarValue::try_from_array(&array, 3).unwrap()
5742        );
5743
5744        Ok(())
5745    }
5746
5747    #[test]
5748    fn test_new_one_decimal128() {
5749        assert_eq!(
5750            ScalarValue::new_one(&DataType::Decimal128(5, 0)).unwrap(),
5751            ScalarValue::Decimal128(Some(1), 5, 0)
5752        );
5753        assert_eq!(
5754            ScalarValue::new_one(&DataType::Decimal128(5, 1)).unwrap(),
5755            ScalarValue::Decimal128(Some(10), 5, 1)
5756        );
5757        assert_eq!(
5758            ScalarValue::new_one(&DataType::Decimal128(5, 2)).unwrap(),
5759            ScalarValue::Decimal128(Some(100), 5, 2)
5760        );
5761        // More precision
5762        assert_eq!(
5763            ScalarValue::new_one(&DataType::Decimal128(7, 2)).unwrap(),
5764            ScalarValue::Decimal128(Some(100), 7, 2)
5765        );
5766        // No negative scale
5767        assert!(ScalarValue::new_one(&DataType::Decimal128(5, -1)).is_err());
5768        // Invalid combination
5769        assert!(ScalarValue::new_one(&DataType::Decimal128(0, 2)).is_err());
5770        assert!(ScalarValue::new_one(&DataType::Decimal128(5, 7)).is_err());
5771    }
5772
5773    #[test]
5774    fn test_new_one_decimal256() {
5775        assert_eq!(
5776            ScalarValue::new_one(&DataType::Decimal256(5, 0)).unwrap(),
5777            ScalarValue::Decimal256(Some(1.into()), 5, 0)
5778        );
5779        assert_eq!(
5780            ScalarValue::new_one(&DataType::Decimal256(5, 1)).unwrap(),
5781            ScalarValue::Decimal256(Some(10.into()), 5, 1)
5782        );
5783        assert_eq!(
5784            ScalarValue::new_one(&DataType::Decimal256(5, 2)).unwrap(),
5785            ScalarValue::Decimal256(Some(100.into()), 5, 2)
5786        );
5787        // More precision
5788        assert_eq!(
5789            ScalarValue::new_one(&DataType::Decimal256(7, 2)).unwrap(),
5790            ScalarValue::Decimal256(Some(100.into()), 7, 2)
5791        );
5792        // No negative scale
5793        assert!(ScalarValue::new_one(&DataType::Decimal256(5, -1)).is_err());
5794        // Invalid combination
5795        assert!(ScalarValue::new_one(&DataType::Decimal256(0, 2)).is_err());
5796        assert!(ScalarValue::new_one(&DataType::Decimal256(5, 7)).is_err());
5797    }
5798
5799    #[test]
5800    fn test_new_ten_decimal128() {
5801        assert_eq!(
5802            ScalarValue::new_ten(&DataType::Decimal128(5, 1)).unwrap(),
5803            ScalarValue::Decimal128(Some(100), 5, 1)
5804        );
5805        assert_eq!(
5806            ScalarValue::new_ten(&DataType::Decimal128(5, 2)).unwrap(),
5807            ScalarValue::Decimal128(Some(1000), 5, 2)
5808        );
5809        // More precision
5810        assert_eq!(
5811            ScalarValue::new_ten(&DataType::Decimal128(7, 2)).unwrap(),
5812            ScalarValue::Decimal128(Some(1000), 7, 2)
5813        );
5814        // No negative scale
5815        assert!(ScalarValue::new_ten(&DataType::Decimal128(5, -1)).is_err());
5816        // Invalid combination
5817        assert!(ScalarValue::new_ten(&DataType::Decimal128(0, 2)).is_err());
5818        assert!(ScalarValue::new_ten(&DataType::Decimal128(5, 7)).is_err());
5819    }
5820
5821    #[test]
5822    fn test_new_ten_decimal256() {
5823        assert_eq!(
5824            ScalarValue::new_ten(&DataType::Decimal256(5, 1)).unwrap(),
5825            ScalarValue::Decimal256(Some(100.into()), 5, 1)
5826        );
5827        assert_eq!(
5828            ScalarValue::new_ten(&DataType::Decimal256(5, 2)).unwrap(),
5829            ScalarValue::Decimal256(Some(1000.into()), 5, 2)
5830        );
5831        // More precision
5832        assert_eq!(
5833            ScalarValue::new_ten(&DataType::Decimal256(7, 2)).unwrap(),
5834            ScalarValue::Decimal256(Some(1000.into()), 7, 2)
5835        );
5836        // No negative scale
5837        assert!(ScalarValue::new_ten(&DataType::Decimal256(5, -1)).is_err());
5838        // Invalid combination
5839        assert!(ScalarValue::new_ten(&DataType::Decimal256(0, 2)).is_err());
5840        assert!(ScalarValue::new_ten(&DataType::Decimal256(5, 7)).is_err());
5841    }
5842
5843    #[test]
5844    fn test_new_negative_one_decimal128() {
5845        assert_eq!(
5846            ScalarValue::new_negative_one(&DataType::Decimal128(5, 0)).unwrap(),
5847            ScalarValue::Decimal128(Some(-1), 5, 0)
5848        );
5849        assert_eq!(
5850            ScalarValue::new_negative_one(&DataType::Decimal128(5, 2)).unwrap(),
5851            ScalarValue::Decimal128(Some(-100), 5, 2)
5852        );
5853    }
5854
5855    #[test]
5856    fn test_list_partial_cmp() {
5857        let a =
5858            ScalarValue::List(Arc::new(
5859                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5860                    Some(1),
5861                    Some(2),
5862                    Some(3),
5863                ])]),
5864            ));
5865        let b =
5866            ScalarValue::List(Arc::new(
5867                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5868                    Some(1),
5869                    Some(2),
5870                    Some(3),
5871                ])]),
5872            ));
5873        assert_eq!(a.partial_cmp(&b), Some(Ordering::Equal));
5874
5875        let a =
5876            ScalarValue::List(Arc::new(
5877                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5878                    Some(10),
5879                    Some(2),
5880                    Some(3),
5881                ])]),
5882            ));
5883        let b =
5884            ScalarValue::List(Arc::new(
5885                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5886                    Some(1),
5887                    Some(2),
5888                    Some(30),
5889                ])]),
5890            ));
5891        assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
5892
5893        let a =
5894            ScalarValue::List(Arc::new(
5895                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5896                    Some(10),
5897                    Some(2),
5898                    Some(3),
5899                ])]),
5900            ));
5901        let b =
5902            ScalarValue::List(Arc::new(
5903                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5904                    Some(10),
5905                    Some(2),
5906                    Some(30),
5907                ])]),
5908            ));
5909        assert_eq!(a.partial_cmp(&b), Some(Ordering::Less));
5910
5911        let a =
5912            ScalarValue::List(Arc::new(
5913                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5914                    Some(1),
5915                    Some(2),
5916                    Some(3),
5917                ])]),
5918            ));
5919        let b =
5920            ScalarValue::List(Arc::new(
5921                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5922                    Some(2),
5923                    Some(3),
5924                ])]),
5925            ));
5926        assert_eq!(a.partial_cmp(&b), Some(Ordering::Less));
5927
5928        let a =
5929            ScalarValue::List(Arc::new(
5930                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5931                    Some(2),
5932                    Some(3),
5933                    Some(4),
5934                ])]),
5935            ));
5936        let b =
5937            ScalarValue::List(Arc::new(
5938                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5939                    Some(1),
5940                    Some(2),
5941                ])]),
5942            ));
5943        assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
5944
5945        let a =
5946            ScalarValue::List(Arc::new(
5947                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5948                    Some(1),
5949                    Some(2),
5950                    Some(3),
5951                ])]),
5952            ));
5953        let b =
5954            ScalarValue::List(Arc::new(
5955                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5956                    Some(1),
5957                    Some(2),
5958                ])]),
5959            ));
5960        assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
5961
5962        let a =
5963            ScalarValue::List(Arc::new(
5964                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5965                    None,
5966                    Some(2),
5967                    Some(3),
5968                ])]),
5969            ));
5970        let b =
5971            ScalarValue::List(Arc::new(
5972                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5973                    Some(1),
5974                    Some(2),
5975                    Some(3),
5976                ])]),
5977            ));
5978        assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
5979
5980        let a = ScalarValue::LargeList(Arc::new(LargeListArray::from_iter_primitive::<
5981            Int64Type,
5982            _,
5983            _,
5984        >(vec![Some(vec![
5985            None,
5986            Some(2),
5987            Some(3),
5988        ])])));
5989        let b = ScalarValue::LargeList(Arc::new(LargeListArray::from_iter_primitive::<
5990            Int64Type,
5991            _,
5992            _,
5993        >(vec![Some(vec![
5994            Some(1),
5995            Some(2),
5996            Some(3),
5997        ])])));
5998        assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
5999
6000        let a = ScalarValue::FixedSizeList(Arc::new(
6001            FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(
6002                vec![Some(vec![None, Some(2), Some(3)])],
6003                3,
6004            ),
6005        ));
6006        let b = ScalarValue::FixedSizeList(Arc::new(
6007            FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(
6008                vec![Some(vec![Some(1), Some(2), Some(3)])],
6009                3,
6010            ),
6011        ));
6012        assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
6013    }
6014
6015    #[test]
6016    fn scalar_value_to_array_u64() -> Result<()> {
6017        let value = ScalarValue::UInt64(Some(13u64));
6018        let array = value.to_array().expect("Failed to convert to array");
6019        let array = as_uint64_array(&array)?;
6020        assert_eq!(array.len(), 1);
6021        assert!(!array.is_null(0));
6022        assert_eq!(array.value(0), 13);
6023
6024        let value = ScalarValue::UInt64(None);
6025        let array = value.to_array().expect("Failed to convert to array");
6026        let array = as_uint64_array(&array)?;
6027        assert_eq!(array.len(), 1);
6028        assert!(array.is_null(0));
6029        Ok(())
6030    }
6031
6032    #[test]
6033    fn scalar_value_to_array_u32() -> Result<()> {
6034        let value = ScalarValue::UInt32(Some(13u32));
6035        let array = value.to_array().expect("Failed to convert to array");
6036        let array = as_uint32_array(&array)?;
6037        assert_eq!(array.len(), 1);
6038        assert!(!array.is_null(0));
6039        assert_eq!(array.value(0), 13);
6040
6041        let value = ScalarValue::UInt32(None);
6042        let array = value.to_array().expect("Failed to convert to array");
6043        let array = as_uint32_array(&array)?;
6044        assert_eq!(array.len(), 1);
6045        assert!(array.is_null(0));
6046        Ok(())
6047    }
6048
6049    #[test]
6050    fn scalar_list_null_to_array() {
6051        let list_array = ScalarValue::new_list_nullable(&[], &DataType::UInt64);
6052
6053        assert_eq!(list_array.len(), 1);
6054        assert_eq!(list_array.values().len(), 0);
6055    }
6056
6057    #[test]
6058    fn scalar_large_list_null_to_array() {
6059        let list_array = ScalarValue::new_large_list(&[], &DataType::UInt64);
6060
6061        assert_eq!(list_array.len(), 1);
6062        assert_eq!(list_array.values().len(), 0);
6063    }
6064
6065    #[test]
6066    fn scalar_list_to_array() -> Result<()> {
6067        let values = vec![
6068            ScalarValue::UInt64(Some(100)),
6069            ScalarValue::UInt64(None),
6070            ScalarValue::UInt64(Some(101)),
6071        ];
6072        let list_array = ScalarValue::new_list_nullable(&values, &DataType::UInt64);
6073        assert_eq!(list_array.len(), 1);
6074        assert_eq!(list_array.values().len(), 3);
6075
6076        let prim_array_ref = list_array.value(0);
6077        let prim_array = as_uint64_array(&prim_array_ref)?;
6078        assert_eq!(prim_array.len(), 3);
6079        assert_eq!(prim_array.value(0), 100);
6080        assert!(prim_array.is_null(1));
6081        assert_eq!(prim_array.value(2), 101);
6082        Ok(())
6083    }
6084
6085    #[test]
6086    fn scalar_large_list_to_array() -> Result<()> {
6087        let values = vec![
6088            ScalarValue::UInt64(Some(100)),
6089            ScalarValue::UInt64(None),
6090            ScalarValue::UInt64(Some(101)),
6091        ];
6092        let list_array = ScalarValue::new_large_list(&values, &DataType::UInt64);
6093        assert_eq!(list_array.len(), 1);
6094        assert_eq!(list_array.values().len(), 3);
6095
6096        let prim_array_ref = list_array.value(0);
6097        let prim_array = as_uint64_array(&prim_array_ref)?;
6098        assert_eq!(prim_array.len(), 3);
6099        assert_eq!(prim_array.value(0), 100);
6100        assert!(prim_array.is_null(1));
6101        assert_eq!(prim_array.value(2), 101);
6102        Ok(())
6103    }
6104
6105    /// Creates array directly and via ScalarValue and ensures they are the same
6106    macro_rules! check_scalar_iter {
6107        ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
6108            let scalars: Vec<_> =
6109                $INPUT.iter().map(|v| ScalarValue::$SCALAR_T(*v)).collect();
6110
6111            let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
6112
6113            let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT));
6114
6115            assert_eq!(&array, &expected);
6116        }};
6117    }
6118
6119    /// Creates array directly and via ScalarValue and ensures they are the same
6120    /// but for variants that carry a timezone field.
6121    macro_rules! check_scalar_iter_tz {
6122        ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
6123            let scalars: Vec<_> = $INPUT
6124                .iter()
6125                .map(|v| ScalarValue::$SCALAR_T(*v, None))
6126                .collect();
6127
6128            let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
6129
6130            let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT));
6131
6132            assert_eq!(&array, &expected);
6133        }};
6134    }
6135
6136    /// Creates array directly and via ScalarValue and ensures they
6137    /// are the same, for string  arrays
6138    macro_rules! check_scalar_iter_string {
6139        ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
6140            let scalars: Vec<_> = $INPUT
6141                .iter()
6142                .map(|v| ScalarValue::$SCALAR_T(v.map(|v| v.to_string())))
6143                .collect();
6144
6145            let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
6146
6147            let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT));
6148
6149            assert_eq!(&array, &expected);
6150        }};
6151    }
6152
6153    /// Creates array directly and via ScalarValue and ensures they
6154    /// are the same, for binary arrays
6155    macro_rules! check_scalar_iter_binary {
6156        ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
6157            let scalars: Vec<_> = $INPUT
6158                .iter()
6159                .map(|v| ScalarValue::$SCALAR_T(v.map(|v| v.to_vec())))
6160                .collect();
6161
6162            let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
6163
6164            let expected: $ARRAYTYPE =
6165                $INPUT.iter().map(|v| v.map(|v| v.to_vec())).collect();
6166
6167            let expected: ArrayRef = Arc::new(expected);
6168
6169            assert_eq!(&array, &expected);
6170        }};
6171    }
6172
6173    #[test]
6174    // despite clippy claiming they are useless, the code doesn't compile otherwise.
6175    #[allow(clippy::useless_vec)]
6176    fn scalar_iter_to_array_boolean() {
6177        check_scalar_iter!(Boolean, BooleanArray, vec![Some(true), None, Some(false)]);
6178        check_scalar_iter!(Float32, Float32Array, vec![Some(1.9), None, Some(-2.1)]);
6179        check_scalar_iter!(Float64, Float64Array, vec![Some(1.9), None, Some(-2.1)]);
6180
6181        check_scalar_iter!(Int8, Int8Array, vec![Some(1), None, Some(3)]);
6182        check_scalar_iter!(Int16, Int16Array, vec![Some(1), None, Some(3)]);
6183        check_scalar_iter!(Int32, Int32Array, vec![Some(1), None, Some(3)]);
6184        check_scalar_iter!(Int64, Int64Array, vec![Some(1), None, Some(3)]);
6185
6186        check_scalar_iter!(UInt8, UInt8Array, vec![Some(1), None, Some(3)]);
6187        check_scalar_iter!(UInt16, UInt16Array, vec![Some(1), None, Some(3)]);
6188        check_scalar_iter!(UInt32, UInt32Array, vec![Some(1), None, Some(3)]);
6189        check_scalar_iter!(UInt64, UInt64Array, vec![Some(1), None, Some(3)]);
6190
6191        check_scalar_iter_tz!(
6192            TimestampSecond,
6193            TimestampSecondArray,
6194            vec![Some(1), None, Some(3)]
6195        );
6196        check_scalar_iter_tz!(
6197            TimestampMillisecond,
6198            TimestampMillisecondArray,
6199            vec![Some(1), None, Some(3)]
6200        );
6201        check_scalar_iter_tz!(
6202            TimestampMicrosecond,
6203            TimestampMicrosecondArray,
6204            vec![Some(1), None, Some(3)]
6205        );
6206        check_scalar_iter_tz!(
6207            TimestampNanosecond,
6208            TimestampNanosecondArray,
6209            vec![Some(1), None, Some(3)]
6210        );
6211
6212        check_scalar_iter_string!(
6213            Utf8,
6214            StringArray,
6215            vec![Some("foo"), None, Some("bar")]
6216        );
6217        check_scalar_iter_string!(
6218            LargeUtf8,
6219            LargeStringArray,
6220            vec![Some("foo"), None, Some("bar")]
6221        );
6222        check_scalar_iter_binary!(
6223            Binary,
6224            BinaryArray,
6225            vec![Some(b"foo"), None, Some(b"bar")]
6226        );
6227        check_scalar_iter_binary!(
6228            LargeBinary,
6229            LargeBinaryArray,
6230            vec![Some(b"foo"), None, Some(b"bar")]
6231        );
6232    }
6233
6234    #[test]
6235    fn scalar_iter_to_array_empty() {
6236        let scalars = vec![] as Vec<ScalarValue>;
6237
6238        let result = ScalarValue::iter_to_array(scalars).unwrap_err();
6239        assert!(
6240            result
6241                .to_string()
6242                .contains("Empty iterator passed to ScalarValue::iter_to_array"),
6243            "{}",
6244            result
6245        );
6246    }
6247
6248    #[test]
6249    fn scalar_iter_to_dictionary() {
6250        fn make_val(v: Option<String>) -> ScalarValue {
6251            let key_type = DataType::Int32;
6252            let value = ScalarValue::Utf8(v);
6253            ScalarValue::Dictionary(Box::new(key_type), Box::new(value))
6254        }
6255
6256        let scalars = [
6257            make_val(Some("Foo".into())),
6258            make_val(None),
6259            make_val(Some("Bar".into())),
6260        ];
6261
6262        let array = ScalarValue::iter_to_array(scalars).unwrap();
6263        let array = as_dictionary_array::<Int32Type>(&array).unwrap();
6264        let values_array = as_string_array(array.values()).unwrap();
6265
6266        let values = array
6267            .keys_iter()
6268            .map(|k| {
6269                k.map(|k| {
6270                    assert!(values_array.is_valid(k));
6271                    values_array.value(k)
6272                })
6273            })
6274            .collect::<Vec<_>>();
6275
6276        let expected = vec![Some("Foo"), None, Some("Bar")];
6277        assert_eq!(values, expected);
6278    }
6279
6280    #[test]
6281    fn scalar_iter_to_array_mismatched_types() {
6282        use ScalarValue::*;
6283        // If the scalar values are not all the correct type, error here
6284        let scalars = [Boolean(Some(true)), Int32(Some(5))];
6285
6286        let result = ScalarValue::iter_to_array(scalars).unwrap_err();
6287        assert!(result.to_string().contains("Inconsistent types in ScalarValue::iter_to_array. Expected Boolean, got Int32(5)"),
6288                "{}", result);
6289    }
6290
6291    #[test]
6292    fn scalar_try_from_array_null() {
6293        let array = vec![Some(33), None].into_iter().collect::<Int64Array>();
6294        let array: ArrayRef = Arc::new(array);
6295
6296        assert_eq!(
6297            ScalarValue::Int64(Some(33)),
6298            ScalarValue::try_from_array(&array, 0).unwrap()
6299        );
6300        assert_eq!(
6301            ScalarValue::Int64(None),
6302            ScalarValue::try_from_array(&array, 1).unwrap()
6303        );
6304    }
6305
6306    #[test]
6307    fn scalar_try_from_array_list_array_null() {
6308        let list = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
6309            Some(vec![Some(1), Some(2)]),
6310            None,
6311        ]);
6312
6313        let non_null_list_scalar = ScalarValue::try_from_array(&list, 0).unwrap();
6314        let null_list_scalar = ScalarValue::try_from_array(&list, 1).unwrap();
6315
6316        let data_type =
6317            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
6318
6319        assert_eq!(non_null_list_scalar.data_type(), data_type);
6320        assert_eq!(null_list_scalar.data_type(), data_type);
6321    }
6322
6323    #[test]
6324    fn scalar_try_from_list_datatypes() {
6325        let inner_field = Arc::new(Field::new_list_field(DataType::Int32, true));
6326
6327        // Test for List
6328        let data_type = &DataType::List(Arc::clone(&inner_field));
6329        let scalar: ScalarValue = data_type.try_into().unwrap();
6330        let expected = ScalarValue::List(
6331            new_null_array(data_type, 1)
6332                .as_list::<i32>()
6333                .to_owned()
6334                .into(),
6335        );
6336        assert_eq!(expected, scalar);
6337        assert!(expected.is_null());
6338
6339        // Test for LargeList
6340        let data_type = &DataType::LargeList(Arc::clone(&inner_field));
6341        let scalar: ScalarValue = data_type.try_into().unwrap();
6342        let expected = ScalarValue::LargeList(
6343            new_null_array(data_type, 1)
6344                .as_list::<i64>()
6345                .to_owned()
6346                .into(),
6347        );
6348        assert_eq!(expected, scalar);
6349        assert!(expected.is_null());
6350
6351        // Test for FixedSizeList(5)
6352        let data_type = &DataType::FixedSizeList(Arc::clone(&inner_field), 5);
6353        let scalar: ScalarValue = data_type.try_into().unwrap();
6354        let expected = ScalarValue::FixedSizeList(
6355            new_null_array(data_type, 1)
6356                .as_fixed_size_list()
6357                .to_owned()
6358                .into(),
6359        );
6360        assert_eq!(expected, scalar);
6361        assert!(expected.is_null());
6362    }
6363
6364    #[test]
6365    fn scalar_try_from_list_of_list() {
6366        let data_type = DataType::List(Arc::new(Field::new_list_field(
6367            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
6368            true,
6369        )));
6370        let data_type = &data_type;
6371        let scalar: ScalarValue = data_type.try_into().unwrap();
6372
6373        let expected = ScalarValue::List(
6374            new_null_array(
6375                &DataType::List(Arc::new(Field::new_list_field(
6376                    DataType::List(Arc::new(Field::new_list_field(
6377                        DataType::Int32,
6378                        true,
6379                    ))),
6380                    true,
6381                ))),
6382                1,
6383            )
6384            .as_list::<i32>()
6385            .to_owned()
6386            .into(),
6387        );
6388
6389        assert_eq!(expected, scalar)
6390    }
6391
6392    #[test]
6393    fn scalar_try_from_not_equal_list_nested_list() {
6394        let list_data_type =
6395            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
6396        let data_type = &list_data_type;
6397        let list_scalar: ScalarValue = data_type.try_into().unwrap();
6398
6399        let nested_list_data_type = DataType::List(Arc::new(Field::new_list_field(
6400            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
6401            true,
6402        )));
6403        let data_type = &nested_list_data_type;
6404        let nested_list_scalar: ScalarValue = data_type.try_into().unwrap();
6405
6406        assert_ne!(list_scalar, nested_list_scalar);
6407    }
6408
6409    #[test]
6410    fn scalar_try_from_dict_datatype() {
6411        let data_type =
6412            DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8));
6413        let data_type = &data_type;
6414        let expected = ScalarValue::Dictionary(
6415            Box::new(DataType::Int8),
6416            Box::new(ScalarValue::Utf8(None)),
6417        );
6418        assert_eq!(expected, data_type.try_into().unwrap())
6419    }
6420
6421    #[test]
6422    fn size_of_scalar() {
6423        // Since ScalarValues are used in a non trivial number of places,
6424        // making it larger means significant more memory consumption
6425        // per distinct value.
6426        //
6427        // Thus this test ensures that no code change makes ScalarValue larger
6428        //
6429        // The alignment requirements differ across architectures and
6430        // thus the size of the enum appears to as well
6431
6432        // The value may also change depending on rust version
6433        assert_eq!(size_of::<ScalarValue>(), 64);
6434    }
6435
6436    #[test]
6437    fn memory_size() {
6438        let sv = ScalarValue::Binary(Some(Vec::with_capacity(10)));
6439        assert_eq!(sv.size(), size_of::<ScalarValue>() + 10,);
6440        let sv_size = sv.size();
6441
6442        let mut v = Vec::with_capacity(10);
6443        // do NOT clone `sv` here because this may shrink the vector capacity
6444        v.push(sv);
6445        assert_eq!(v.capacity(), 10);
6446        assert_eq!(
6447            ScalarValue::size_of_vec(&v),
6448            size_of::<Vec<ScalarValue>>() + (9 * size_of::<ScalarValue>()) + sv_size,
6449        );
6450
6451        let mut s = HashSet::with_capacity(0);
6452        // do NOT clone `sv` here because this may shrink the vector capacity
6453        s.insert(v.pop().unwrap());
6454        // hashsets may easily grow during insert, so capacity is dynamic
6455        let s_capacity = s.capacity();
6456        assert_eq!(
6457            ScalarValue::size_of_hashset(&s),
6458            size_of::<HashSet<ScalarValue>>()
6459                + ((s_capacity - 1) * size_of::<ScalarValue>())
6460                + sv_size,
6461        );
6462    }
6463
6464    #[test]
6465    fn scalar_eq_array() {
6466        // Validate that eq_array has the same semantics as ScalarValue::eq
6467        macro_rules! make_typed_vec {
6468            ($INPUT:expr, $TYPE:ident) => {{
6469                $INPUT
6470                    .iter()
6471                    .map(|v| v.map(|v| v as $TYPE))
6472                    .collect::<Vec<_>>()
6473            }};
6474        }
6475
6476        let bool_vals = [Some(true), None, Some(false)];
6477        let f32_vals = [Some(-1.0), None, Some(1.0)];
6478        let f64_vals = make_typed_vec!(f32_vals, f64);
6479
6480        let i8_vals = [Some(-1), None, Some(1)];
6481        let i16_vals = make_typed_vec!(i8_vals, i16);
6482        let i32_vals = make_typed_vec!(i8_vals, i32);
6483        let i64_vals = make_typed_vec!(i8_vals, i64);
6484
6485        let u8_vals = [Some(0), None, Some(1)];
6486        let u16_vals = make_typed_vec!(u8_vals, u16);
6487        let u32_vals = make_typed_vec!(u8_vals, u32);
6488        let u64_vals = make_typed_vec!(u8_vals, u64);
6489
6490        let str_vals = [Some("foo"), None, Some("bar")];
6491
6492        let interval_dt_vals = [
6493            Some(IntervalDayTime::MINUS_ONE),
6494            None,
6495            Some(IntervalDayTime::ONE),
6496        ];
6497        let interval_mdn_vals = [
6498            Some(IntervalMonthDayNano::MINUS_ONE),
6499            None,
6500            Some(IntervalMonthDayNano::ONE),
6501        ];
6502
6503        /// Test each value in `scalar` with the corresponding element
6504        /// at `array`. Assumes each element is unique (aka not equal
6505        /// with all other indexes)
6506        #[derive(Debug)]
6507        struct TestCase {
6508            array: ArrayRef,
6509            scalars: Vec<ScalarValue>,
6510        }
6511
6512        /// Create a test case for casing the input to the specified array type
6513        macro_rules! make_test_case {
6514            ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident) => {{
6515                TestCase {
6516                    array: Arc::new($INPUT.iter().collect::<$ARRAY_TY>()),
6517                    scalars: $INPUT.iter().map(|v| ScalarValue::$SCALAR_TY(*v)).collect(),
6518                }
6519            }};
6520
6521            ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident, $TZ:expr) => {{
6522                let tz = $TZ;
6523                TestCase {
6524                    array: Arc::new($INPUT.iter().collect::<$ARRAY_TY>()),
6525                    scalars: $INPUT
6526                        .iter()
6527                        .map(|v| ScalarValue::$SCALAR_TY(*v, tz.clone()))
6528                        .collect(),
6529                }
6530            }};
6531        }
6532
6533        macro_rules! make_str_test_case {
6534            ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident) => {{
6535                TestCase {
6536                    array: Arc::new($INPUT.iter().cloned().collect::<$ARRAY_TY>()),
6537                    scalars: $INPUT
6538                        .iter()
6539                        .map(|v| ScalarValue::$SCALAR_TY(v.map(|v| v.to_string())))
6540                        .collect(),
6541                }
6542            }};
6543        }
6544
6545        macro_rules! make_binary_test_case {
6546            ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident) => {{
6547                TestCase {
6548                    array: Arc::new($INPUT.iter().cloned().collect::<$ARRAY_TY>()),
6549                    scalars: $INPUT
6550                        .iter()
6551                        .map(|v| {
6552                            ScalarValue::$SCALAR_TY(v.map(|v| v.as_bytes().to_vec()))
6553                        })
6554                        .collect(),
6555                }
6556            }};
6557        }
6558
6559        /// create a test case for DictionaryArray<$INDEX_TY>
6560        macro_rules! make_str_dict_test_case {
6561            ($INPUT:expr, $INDEX_TY:ident) => {{
6562                TestCase {
6563                    array: Arc::new(
6564                        $INPUT
6565                            .iter()
6566                            .cloned()
6567                            .collect::<DictionaryArray<$INDEX_TY>>(),
6568                    ),
6569                    scalars: $INPUT
6570                        .iter()
6571                        .map(|v| {
6572                            ScalarValue::Dictionary(
6573                                Box::new($INDEX_TY::DATA_TYPE),
6574                                Box::new(ScalarValue::Utf8(v.map(|v| v.to_string()))),
6575                            )
6576                        })
6577                        .collect(),
6578                }
6579            }};
6580        }
6581
6582        let cases = vec![
6583            make_test_case!(bool_vals, BooleanArray, Boolean),
6584            make_test_case!(f32_vals, Float32Array, Float32),
6585            make_test_case!(f64_vals, Float64Array, Float64),
6586            make_test_case!(i8_vals, Int8Array, Int8),
6587            make_test_case!(i16_vals, Int16Array, Int16),
6588            make_test_case!(i32_vals, Int32Array, Int32),
6589            make_test_case!(i64_vals, Int64Array, Int64),
6590            make_test_case!(u8_vals, UInt8Array, UInt8),
6591            make_test_case!(u16_vals, UInt16Array, UInt16),
6592            make_test_case!(u32_vals, UInt32Array, UInt32),
6593            make_test_case!(u64_vals, UInt64Array, UInt64),
6594            make_str_test_case!(str_vals, StringArray, Utf8),
6595            make_str_test_case!(str_vals, LargeStringArray, LargeUtf8),
6596            make_binary_test_case!(str_vals, BinaryArray, Binary),
6597            make_binary_test_case!(str_vals, LargeBinaryArray, LargeBinary),
6598            make_test_case!(i32_vals, Date32Array, Date32),
6599            make_test_case!(i64_vals, Date64Array, Date64),
6600            make_test_case!(i32_vals, Time32SecondArray, Time32Second),
6601            make_test_case!(i32_vals, Time32MillisecondArray, Time32Millisecond),
6602            make_test_case!(i64_vals, Time64MicrosecondArray, Time64Microsecond),
6603            make_test_case!(i64_vals, Time64NanosecondArray, Time64Nanosecond),
6604            make_test_case!(i64_vals, TimestampSecondArray, TimestampSecond, None),
6605            make_test_case!(
6606                i64_vals,
6607                TimestampSecondArray,
6608                TimestampSecond,
6609                Some("UTC".into())
6610            ),
6611            make_test_case!(
6612                i64_vals,
6613                TimestampMillisecondArray,
6614                TimestampMillisecond,
6615                None
6616            ),
6617            make_test_case!(
6618                i64_vals,
6619                TimestampMillisecondArray,
6620                TimestampMillisecond,
6621                Some("UTC".into())
6622            ),
6623            make_test_case!(
6624                i64_vals,
6625                TimestampMicrosecondArray,
6626                TimestampMicrosecond,
6627                None
6628            ),
6629            make_test_case!(
6630                i64_vals,
6631                TimestampMicrosecondArray,
6632                TimestampMicrosecond,
6633                Some("UTC".into())
6634            ),
6635            make_test_case!(
6636                i64_vals,
6637                TimestampNanosecondArray,
6638                TimestampNanosecond,
6639                None
6640            ),
6641            make_test_case!(
6642                i64_vals,
6643                TimestampNanosecondArray,
6644                TimestampNanosecond,
6645                Some("UTC".into())
6646            ),
6647            make_test_case!(i32_vals, IntervalYearMonthArray, IntervalYearMonth),
6648            make_test_case!(interval_dt_vals, IntervalDayTimeArray, IntervalDayTime),
6649            make_test_case!(
6650                interval_mdn_vals,
6651                IntervalMonthDayNanoArray,
6652                IntervalMonthDayNano
6653            ),
6654            make_str_dict_test_case!(str_vals, Int8Type),
6655            make_str_dict_test_case!(str_vals, Int16Type),
6656            make_str_dict_test_case!(str_vals, Int32Type),
6657            make_str_dict_test_case!(str_vals, Int64Type),
6658            make_str_dict_test_case!(str_vals, UInt8Type),
6659            make_str_dict_test_case!(str_vals, UInt16Type),
6660            make_str_dict_test_case!(str_vals, UInt32Type),
6661            make_str_dict_test_case!(str_vals, UInt64Type),
6662        ];
6663
6664        for case in cases {
6665            println!("**** Test Case *****");
6666            let TestCase { array, scalars } = case;
6667            println!("Input array type: {}", array.data_type());
6668            println!("Input scalars: {scalars:#?}");
6669            assert_eq!(array.len(), scalars.len());
6670
6671            for (index, scalar) in scalars.into_iter().enumerate() {
6672                assert!(
6673                    scalar
6674                        .eq_array(&array, index)
6675                        .expect("Failed to compare arrays"),
6676                    "Expected {scalar:?} to be equal to {array:?} at index {index}"
6677                );
6678
6679                // test that all other elements are *not* equal
6680                for other_index in 0..array.len() {
6681                    if index != other_index {
6682                        assert!(
6683                            !scalar.eq_array(&array, other_index).expect("Failed to compare arrays"),
6684                            "Expected {scalar:?} to be NOT equal to {array:?} at index {other_index}"
6685                        );
6686                    }
6687                }
6688            }
6689        }
6690    }
6691
6692    #[test]
6693    fn scalar_partial_ordering() {
6694        use ScalarValue::*;
6695
6696        assert_eq!(
6697            Int64(Some(33)).partial_cmp(&Int64(Some(0))),
6698            Some(Ordering::Greater)
6699        );
6700        assert_eq!(
6701            Int64(Some(0)).partial_cmp(&Int64(Some(33))),
6702            Some(Ordering::Less)
6703        );
6704        assert_eq!(
6705            Int64(Some(33)).partial_cmp(&Int64(Some(33))),
6706            Some(Ordering::Equal)
6707        );
6708        // For different data type, `partial_cmp` returns None.
6709        assert_eq!(Int64(Some(33)).partial_cmp(&Int32(Some(33))), None);
6710        assert_eq!(Int32(Some(33)).partial_cmp(&Int64(Some(33))), None);
6711
6712        assert_eq!(
6713            ScalarValue::from(vec![
6714                ("A", ScalarValue::from(1.0)),
6715                ("B", ScalarValue::from("Z")),
6716            ])
6717            .partial_cmp(&ScalarValue::from(vec![
6718                ("A", ScalarValue::from(2.0)),
6719                ("B", ScalarValue::from("A")),
6720            ])),
6721            Some(Ordering::Less)
6722        );
6723
6724        // For different struct fields, `partial_cmp` returns None.
6725        assert_eq!(
6726            ScalarValue::from(vec![
6727                ("A", ScalarValue::from(1.0)),
6728                ("B", ScalarValue::from("Z")),
6729            ])
6730            .partial_cmp(&ScalarValue::from(vec![
6731                ("a", ScalarValue::from(2.0)),
6732                ("b", ScalarValue::from("A")),
6733            ])),
6734            None
6735        );
6736    }
6737
6738    #[test]
6739    fn test_scalar_value_from_string() {
6740        let scalar = ScalarValue::from("foo");
6741        assert_eq!(scalar, ScalarValue::Utf8(Some("foo".to_string())));
6742        let scalar = ScalarValue::from("foo".to_string());
6743        assert_eq!(scalar, ScalarValue::Utf8(Some("foo".to_string())));
6744        let scalar = ScalarValue::from_str("foo").unwrap();
6745        assert_eq!(scalar, ScalarValue::Utf8(Some("foo".to_string())));
6746    }
6747
6748    #[test]
6749    fn test_scalar_struct() {
6750        let field_a = Arc::new(Field::new("A", DataType::Int32, false));
6751        let field_b = Arc::new(Field::new("B", DataType::Boolean, false));
6752        let field_c = Arc::new(Field::new("C", DataType::Utf8, false));
6753
6754        let field_e = Arc::new(Field::new("e", DataType::Int16, false));
6755        let field_f = Arc::new(Field::new("f", DataType::Int64, false));
6756        let field_d = Arc::new(Field::new(
6757            "D",
6758            DataType::Struct(vec![Arc::clone(&field_e), Arc::clone(&field_f)].into()),
6759            false,
6760        ));
6761
6762        let struct_array = StructArray::from(vec![
6763            (
6764                Arc::clone(&field_e),
6765                Arc::new(Int16Array::from(vec![2])) as ArrayRef,
6766            ),
6767            (
6768                Arc::clone(&field_f),
6769                Arc::new(Int64Array::from(vec![3])) as ArrayRef,
6770            ),
6771        ]);
6772
6773        let struct_array = StructArray::from(vec![
6774            (
6775                Arc::clone(&field_a),
6776                Arc::new(Int32Array::from(vec![23])) as ArrayRef,
6777            ),
6778            (
6779                Arc::clone(&field_b),
6780                Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
6781            ),
6782            (
6783                Arc::clone(&field_c),
6784                Arc::new(StringArray::from(vec!["Hello"])) as ArrayRef,
6785            ),
6786            (Arc::clone(&field_d), Arc::new(struct_array) as ArrayRef),
6787        ]);
6788        let scalar = ScalarValue::Struct(Arc::new(struct_array));
6789
6790        let array = scalar
6791            .to_array_of_size(2)
6792            .expect("Failed to convert to array of size");
6793
6794        let expected = Arc::new(StructArray::from(vec![
6795            (
6796                Arc::clone(&field_a),
6797                Arc::new(Int32Array::from(vec![23, 23])) as ArrayRef,
6798            ),
6799            (
6800                Arc::clone(&field_b),
6801                Arc::new(BooleanArray::from(vec![false, false])) as ArrayRef,
6802            ),
6803            (
6804                Arc::clone(&field_c),
6805                Arc::new(StringArray::from(vec!["Hello", "Hello"])) as ArrayRef,
6806            ),
6807            (
6808                Arc::clone(&field_d),
6809                Arc::new(StructArray::from(vec![
6810                    (
6811                        Arc::clone(&field_e),
6812                        Arc::new(Int16Array::from(vec![2, 2])) as ArrayRef,
6813                    ),
6814                    (
6815                        Arc::clone(&field_f),
6816                        Arc::new(Int64Array::from(vec![3, 3])) as ArrayRef,
6817                    ),
6818                ])) as ArrayRef,
6819            ),
6820        ])) as ArrayRef;
6821
6822        assert_eq!(&array, &expected);
6823
6824        // Construct from second element of ArrayRef
6825        let constructed = ScalarValue::try_from_array(&expected, 1).unwrap();
6826        assert_eq!(constructed, scalar);
6827
6828        // None version
6829        let none_scalar = ScalarValue::try_from(array.data_type()).unwrap();
6830        assert!(none_scalar.is_null());
6831        assert_eq!(
6832            format!("{none_scalar:?}"),
6833            String::from("Struct({A:,B:,C:,D:})")
6834        );
6835
6836        // Construct with convenience From<Vec<(&str, ScalarValue)>>
6837        let constructed = ScalarValue::from(vec![
6838            ("A", ScalarValue::from(23)),
6839            ("B", ScalarValue::from(false)),
6840            ("C", ScalarValue::from("Hello")),
6841            (
6842                "D",
6843                ScalarValue::from(vec![
6844                    ("e", ScalarValue::from(2i16)),
6845                    ("f", ScalarValue::from(3i64)),
6846                ]),
6847            ),
6848        ]);
6849        assert_eq!(constructed, scalar);
6850
6851        // Build Array from Vec of structs
6852        let scalars = vec![
6853            ScalarValue::from(vec![
6854                ("A", ScalarValue::from(23)),
6855                ("B", ScalarValue::from(false)),
6856                ("C", ScalarValue::from("Hello")),
6857                (
6858                    "D",
6859                    ScalarValue::from(vec![
6860                        ("e", ScalarValue::from(2i16)),
6861                        ("f", ScalarValue::from(3i64)),
6862                    ]),
6863                ),
6864            ]),
6865            ScalarValue::from(vec![
6866                ("A", ScalarValue::from(7)),
6867                ("B", ScalarValue::from(true)),
6868                ("C", ScalarValue::from("World")),
6869                (
6870                    "D",
6871                    ScalarValue::from(vec![
6872                        ("e", ScalarValue::from(4i16)),
6873                        ("f", ScalarValue::from(5i64)),
6874                    ]),
6875                ),
6876            ]),
6877            ScalarValue::from(vec![
6878                ("A", ScalarValue::from(-1000)),
6879                ("B", ScalarValue::from(true)),
6880                ("C", ScalarValue::from("!!!!!")),
6881                (
6882                    "D",
6883                    ScalarValue::from(vec![
6884                        ("e", ScalarValue::from(6i16)),
6885                        ("f", ScalarValue::from(7i64)),
6886                    ]),
6887                ),
6888            ]),
6889        ];
6890        let array = ScalarValue::iter_to_array(scalars).unwrap();
6891
6892        let expected = Arc::new(StructArray::from(vec![
6893            (
6894                Arc::clone(&field_a),
6895                Arc::new(Int32Array::from(vec![23, 7, -1000])) as ArrayRef,
6896            ),
6897            (
6898                Arc::clone(&field_b),
6899                Arc::new(BooleanArray::from(vec![false, true, true])) as ArrayRef,
6900            ),
6901            (
6902                Arc::clone(&field_c),
6903                Arc::new(StringArray::from(vec!["Hello", "World", "!!!!!"])) as ArrayRef,
6904            ),
6905            (
6906                Arc::clone(&field_d),
6907                Arc::new(StructArray::from(vec![
6908                    (
6909                        Arc::clone(&field_e),
6910                        Arc::new(Int16Array::from(vec![2, 4, 6])) as ArrayRef,
6911                    ),
6912                    (
6913                        Arc::clone(&field_f),
6914                        Arc::new(Int64Array::from(vec![3, 5, 7])) as ArrayRef,
6915                    ),
6916                ])) as ArrayRef,
6917            ),
6918        ])) as ArrayRef;
6919
6920        assert_eq!(&array, &expected);
6921    }
6922
6923    #[test]
6924    fn round_trip() {
6925        // Each array type should be able to round tripped through a scalar
6926        let cases: Vec<ArrayRef> = vec![
6927            // int
6928            Arc::new(Int8Array::from(vec![Some(1), None, Some(3)])),
6929            Arc::new(Int16Array::from(vec![Some(1), None, Some(3)])),
6930            Arc::new(Int32Array::from(vec![Some(1), None, Some(3)])),
6931            Arc::new(Int64Array::from(vec![Some(1), None, Some(3)])),
6932            Arc::new(UInt8Array::from(vec![Some(1), None, Some(3)])),
6933            Arc::new(UInt16Array::from(vec![Some(1), None, Some(3)])),
6934            Arc::new(UInt32Array::from(vec![Some(1), None, Some(3)])),
6935            Arc::new(UInt64Array::from(vec![Some(1), None, Some(3)])),
6936            // bool
6937            Arc::new(BooleanArray::from(vec![Some(true), None, Some(false)])),
6938            // float
6939            Arc::new(Float32Array::from(vec![Some(1.0), None, Some(3.0)])),
6940            Arc::new(Float64Array::from(vec![Some(1.0), None, Some(3.0)])),
6941            // string array
6942            Arc::new(StringArray::from(vec![Some("foo"), None, Some("bar")])),
6943            Arc::new(LargeStringArray::from(vec![Some("foo"), None, Some("bar")])),
6944            Arc::new(StringViewArray::from(vec![Some("foo"), None, Some("bar")])),
6945            // string dictionary
6946            {
6947                let mut builder = StringDictionaryBuilder::<Int32Type>::new();
6948                builder.append("foo").unwrap();
6949                builder.append_null();
6950                builder.append("bar").unwrap();
6951                Arc::new(builder.finish())
6952            },
6953            // binary array
6954            Arc::new(BinaryArray::from_iter(vec![
6955                Some(b"foo"),
6956                None,
6957                Some(b"bar"),
6958            ])),
6959            Arc::new(LargeBinaryArray::from_iter(vec![
6960                Some(b"foo"),
6961                None,
6962                Some(b"bar"),
6963            ])),
6964            Arc::new(BinaryViewArray::from_iter(vec![
6965                Some(b"foo"),
6966                None,
6967                Some(b"bar"),
6968            ])),
6969            // timestamp
6970            Arc::new(TimestampSecondArray::from(vec![Some(1), None, Some(3)])),
6971            Arc::new(TimestampMillisecondArray::from(vec![
6972                Some(1),
6973                None,
6974                Some(3),
6975            ])),
6976            Arc::new(TimestampMicrosecondArray::from(vec![
6977                Some(1),
6978                None,
6979                Some(3),
6980            ])),
6981            Arc::new(TimestampNanosecondArray::from(vec![Some(1), None, Some(3)])),
6982            // timestamp with timezone
6983            Arc::new(
6984                TimestampSecondArray::from(vec![Some(1), None, Some(3)])
6985                    .with_timezone_opt(Some("UTC")),
6986            ),
6987            Arc::new(
6988                TimestampMillisecondArray::from(vec![Some(1), None, Some(3)])
6989                    .with_timezone_opt(Some("UTC")),
6990            ),
6991            Arc::new(
6992                TimestampMicrosecondArray::from(vec![Some(1), None, Some(3)])
6993                    .with_timezone_opt(Some("UTC")),
6994            ),
6995            Arc::new(
6996                TimestampNanosecondArray::from(vec![Some(1), None, Some(3)])
6997                    .with_timezone_opt(Some("UTC")),
6998            ),
6999            // date
7000            Arc::new(Date32Array::from(vec![Some(1), None, Some(3)])),
7001            Arc::new(Date64Array::from(vec![Some(1), None, Some(3)])),
7002            // time
7003            Arc::new(Time32SecondArray::from(vec![Some(1), None, Some(3)])),
7004            Arc::new(Time32MillisecondArray::from(vec![Some(1), None, Some(3)])),
7005            Arc::new(Time64MicrosecondArray::from(vec![Some(1), None, Some(3)])),
7006            Arc::new(Time64NanosecondArray::from(vec![Some(1), None, Some(3)])),
7007            // null array
7008            Arc::new(NullArray::new(3)),
7009            // dense union
7010            {
7011                let mut builder = UnionBuilder::new_dense();
7012                builder.append::<Int32Type>("a", 1).unwrap();
7013                builder.append::<Float64Type>("b", 3.4).unwrap();
7014                Arc::new(builder.build().unwrap())
7015            },
7016            // sparse union
7017            {
7018                let mut builder = UnionBuilder::new_sparse();
7019                builder.append::<Int32Type>("a", 1).unwrap();
7020                builder.append::<Float64Type>("b", 3.4).unwrap();
7021                Arc::new(builder.build().unwrap())
7022            },
7023            // list array
7024            {
7025                let values_builder = StringBuilder::new();
7026                let mut builder = ListBuilder::new(values_builder);
7027                // [A, B]
7028                builder.values().append_value("A");
7029                builder.values().append_value("B");
7030                builder.append(true);
7031                // [ ] (empty list)
7032                builder.append(true);
7033                // Null
7034                builder.values().append_value("?"); // irrelevant
7035                builder.append(false);
7036                Arc::new(builder.finish())
7037            },
7038            // large list array
7039            {
7040                let values_builder = StringBuilder::new();
7041                let mut builder = LargeListBuilder::new(values_builder);
7042                // [A, B]
7043                builder.values().append_value("A");
7044                builder.values().append_value("B");
7045                builder.append(true);
7046                // [ ] (empty list)
7047                builder.append(true);
7048                // Null
7049                builder.append(false);
7050                Arc::new(builder.finish())
7051            },
7052            // fixed size list array
7053            {
7054                let values_builder = Int32Builder::new();
7055                let mut builder = FixedSizeListBuilder::new(values_builder, 3);
7056
7057                //  [[0, 1, 2], null, [3, null, 5]
7058                builder.values().append_value(0);
7059                builder.values().append_value(1);
7060                builder.values().append_value(2);
7061                builder.append(true);
7062                builder.values().append_null();
7063                builder.values().append_null();
7064                builder.values().append_null();
7065                builder.append(false);
7066                builder.values().append_value(3);
7067                builder.values().append_null();
7068                builder.values().append_value(5);
7069                builder.append(true);
7070                Arc::new(builder.finish())
7071            },
7072            // map
7073            {
7074                let string_builder = StringBuilder::new();
7075                let int_builder = Int32Builder::with_capacity(4);
7076
7077                let mut builder = MapBuilder::new(None, string_builder, int_builder);
7078                // {"joe": 1}
7079                builder.keys().append_value("joe");
7080                builder.values().append_value(1);
7081                builder.append(true).unwrap();
7082                // {}
7083                builder.append(true).unwrap();
7084                // null
7085                builder.append(false).unwrap();
7086
7087                Arc::new(builder.finish())
7088            },
7089        ];
7090
7091        for arr in cases {
7092            round_trip_through_scalar(arr);
7093        }
7094    }
7095
7096    /// for each row in `arr`:
7097    /// 1. convert to a `ScalarValue`
7098    /// 2. Convert `ScalarValue` back to an `ArrayRef`
7099    /// 3. Compare the original array (sliced) and new array for equality
7100    fn round_trip_through_scalar(arr: ArrayRef) {
7101        for i in 0..arr.len() {
7102            // convert Scalar --> Array
7103            let scalar = ScalarValue::try_from_array(&arr, i).unwrap();
7104            let array = scalar.to_array_of_size(1).unwrap();
7105            assert_eq!(array.len(), 1);
7106            assert_eq!(array.data_type(), arr.data_type());
7107            assert_eq!(array.as_ref(), arr.slice(i, 1).as_ref());
7108        }
7109    }
7110
7111    #[test]
7112    fn test_scalar_union_sparse() {
7113        let field_a = Arc::new(Field::new("A", DataType::Int32, true));
7114        let field_b = Arc::new(Field::new("B", DataType::Boolean, true));
7115        let field_c = Arc::new(Field::new("C", DataType::Utf8, true));
7116        let fields = UnionFields::from_iter([(0, field_a), (1, field_b), (2, field_c)]);
7117
7118        let mut values_a = vec![None; 6];
7119        values_a[0] = Some(42);
7120        let mut values_b = vec![None; 6];
7121        values_b[1] = Some(true);
7122        let mut values_c = vec![None; 6];
7123        values_c[2] = Some("foo");
7124        let children: Vec<ArrayRef> = vec![
7125            Arc::new(Int32Array::from(values_a)),
7126            Arc::new(BooleanArray::from(values_b)),
7127            Arc::new(StringArray::from(values_c)),
7128        ];
7129
7130        let type_ids = ScalarBuffer::from(vec![0, 1, 2, 0, 1, 2]);
7131        let array: ArrayRef = Arc::new(
7132            UnionArray::try_new(fields.clone(), type_ids, None, children)
7133                .expect("UnionArray"),
7134        );
7135
7136        let expected = [
7137            (0, ScalarValue::from(42)),
7138            (1, ScalarValue::from(true)),
7139            (2, ScalarValue::from("foo")),
7140            (0, ScalarValue::Int32(None)),
7141            (1, ScalarValue::Boolean(None)),
7142            (2, ScalarValue::Utf8(None)),
7143        ];
7144
7145        for (i, (ti, value)) in expected.into_iter().enumerate() {
7146            let is_null = value.is_null();
7147            let value = Some((ti, Box::new(value)));
7148            let expected = ScalarValue::Union(value, fields.clone(), UnionMode::Sparse);
7149            let actual = ScalarValue::try_from_array(&array, i).expect("try_from_array");
7150
7151            assert_eq!(
7152                actual, expected,
7153                "[{i}] {actual} was not equal to {expected}"
7154            );
7155
7156            assert!(
7157                expected.eq_array(&array, i).expect("eq_array"),
7158                "[{i}] {expected}.eq_array was false"
7159            );
7160
7161            if is_null {
7162                assert!(actual.is_null(), "[{i}] {actual} was not null")
7163            }
7164        }
7165    }
7166
7167    #[test]
7168    fn test_scalar_union_dense() {
7169        let field_a = Arc::new(Field::new("A", DataType::Int32, true));
7170        let field_b = Arc::new(Field::new("B", DataType::Boolean, true));
7171        let field_c = Arc::new(Field::new("C", DataType::Utf8, true));
7172        let fields = UnionFields::from_iter([(0, field_a), (1, field_b), (2, field_c)]);
7173        let children: Vec<ArrayRef> = vec![
7174            Arc::new(Int32Array::from(vec![Some(42), None])),
7175            Arc::new(BooleanArray::from(vec![Some(true), None])),
7176            Arc::new(StringArray::from(vec![Some("foo"), None])),
7177        ];
7178
7179        let type_ids = ScalarBuffer::from(vec![0, 1, 2, 0, 1, 2]);
7180        let offsets = ScalarBuffer::from(vec![0, 0, 0, 1, 1, 1]);
7181        let array: ArrayRef = Arc::new(
7182            UnionArray::try_new(fields.clone(), type_ids, Some(offsets), children)
7183                .expect("UnionArray"),
7184        );
7185
7186        let expected = [
7187            (0, ScalarValue::from(42)),
7188            (1, ScalarValue::from(true)),
7189            (2, ScalarValue::from("foo")),
7190            (0, ScalarValue::Int32(None)),
7191            (1, ScalarValue::Boolean(None)),
7192            (2, ScalarValue::Utf8(None)),
7193        ];
7194
7195        for (i, (ti, value)) in expected.into_iter().enumerate() {
7196            let is_null = value.is_null();
7197            let value = Some((ti, Box::new(value)));
7198            let expected = ScalarValue::Union(value, fields.clone(), UnionMode::Dense);
7199            let actual = ScalarValue::try_from_array(&array, i).expect("try_from_array");
7200
7201            assert_eq!(
7202                actual, expected,
7203                "[{i}] {actual} was not equal to {expected}"
7204            );
7205
7206            assert!(
7207                expected.eq_array(&array, i).expect("eq_array"),
7208                "[{i}] {expected}.eq_array was false"
7209            );
7210
7211            if is_null {
7212                assert!(actual.is_null(), "[{i}] {actual} was not null")
7213            }
7214        }
7215    }
7216
7217    #[test]
7218    fn test_lists_in_struct() {
7219        let field_a = Arc::new(Field::new("A", DataType::Utf8, false));
7220        let field_primitive_list = Arc::new(Field::new(
7221            "primitive_list",
7222            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
7223            false,
7224        ));
7225
7226        // Define primitive list scalars
7227        let l0 =
7228            ScalarValue::List(Arc::new(
7229                ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
7230                    Some(1),
7231                    Some(2),
7232                    Some(3),
7233                ])]),
7234            ));
7235        let l1 =
7236            ScalarValue::List(Arc::new(
7237                ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
7238                    Some(4),
7239                    Some(5),
7240                ])]),
7241            ));
7242        let l2 = ScalarValue::List(Arc::new(ListArray::from_iter_primitive::<
7243            Int32Type,
7244            _,
7245            _,
7246        >(vec![Some(vec![Some(6)])])));
7247
7248        // Define struct scalars
7249        let s0 = ScalarValue::from(vec![
7250            ("A", ScalarValue::from("First")),
7251            ("primitive_list", l0),
7252        ]);
7253
7254        let s1 = ScalarValue::from(vec![
7255            ("A", ScalarValue::from("Second")),
7256            ("primitive_list", l1),
7257        ]);
7258
7259        let s2 = ScalarValue::from(vec![
7260            ("A", ScalarValue::from("Third")),
7261            ("primitive_list", l2),
7262        ]);
7263
7264        // iter_to_array for struct scalars
7265        let array =
7266            ScalarValue::iter_to_array(vec![s0.clone(), s1.clone(), s2.clone()]).unwrap();
7267
7268        let array = as_struct_array(&array).unwrap();
7269        let expected = StructArray::from(vec![
7270            (
7271                Arc::clone(&field_a),
7272                Arc::new(StringArray::from(vec!["First", "Second", "Third"])) as ArrayRef,
7273            ),
7274            (
7275                Arc::clone(&field_primitive_list),
7276                Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
7277                    Some(vec![Some(1), Some(2), Some(3)]),
7278                    Some(vec![Some(4), Some(5)]),
7279                    Some(vec![Some(6)]),
7280                ])),
7281            ),
7282        ]);
7283
7284        assert_eq!(array, &expected);
7285
7286        // Define list-of-structs scalars
7287
7288        let nl0_array = ScalarValue::iter_to_array(vec![s0, s1.clone()]).unwrap();
7289        let nl0 = SingleRowListArrayBuilder::new(nl0_array).build_list_scalar();
7290
7291        let nl1_array = ScalarValue::iter_to_array(vec![s2]).unwrap();
7292        let nl1 = SingleRowListArrayBuilder::new(nl1_array).build_list_scalar();
7293
7294        let nl2_array = ScalarValue::iter_to_array(vec![s1]).unwrap();
7295        let nl2 = SingleRowListArrayBuilder::new(nl2_array).build_list_scalar();
7296
7297        // iter_to_array for list-of-struct
7298        let array = ScalarValue::iter_to_array(vec![nl0, nl1, nl2]).unwrap();
7299        let array = array.as_list::<i32>();
7300
7301        // Construct expected array with array builders
7302        let field_a_builder = StringBuilder::with_capacity(4, 1024);
7303        let primitive_value_builder = Int32Array::builder(8);
7304        let field_primitive_list_builder = ListBuilder::new(primitive_value_builder);
7305
7306        let element_builder = StructBuilder::new(
7307            vec![field_a, field_primitive_list],
7308            vec![
7309                Box::new(field_a_builder),
7310                Box::new(field_primitive_list_builder),
7311            ],
7312        );
7313
7314        let mut list_builder = ListBuilder::new(element_builder);
7315
7316        list_builder
7317            .values()
7318            .field_builder::<StringBuilder>(0)
7319            .unwrap()
7320            .append_value("First");
7321        list_builder
7322            .values()
7323            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7324            .unwrap()
7325            .values()
7326            .append_value(1);
7327        list_builder
7328            .values()
7329            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7330            .unwrap()
7331            .values()
7332            .append_value(2);
7333        list_builder
7334            .values()
7335            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7336            .unwrap()
7337            .values()
7338            .append_value(3);
7339        list_builder
7340            .values()
7341            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7342            .unwrap()
7343            .append(true);
7344        list_builder.values().append(true);
7345
7346        list_builder
7347            .values()
7348            .field_builder::<StringBuilder>(0)
7349            .unwrap()
7350            .append_value("Second");
7351        list_builder
7352            .values()
7353            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7354            .unwrap()
7355            .values()
7356            .append_value(4);
7357        list_builder
7358            .values()
7359            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7360            .unwrap()
7361            .values()
7362            .append_value(5);
7363        list_builder
7364            .values()
7365            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7366            .unwrap()
7367            .append(true);
7368        list_builder.values().append(true);
7369        list_builder.append(true);
7370
7371        list_builder
7372            .values()
7373            .field_builder::<StringBuilder>(0)
7374            .unwrap()
7375            .append_value("Third");
7376        list_builder
7377            .values()
7378            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7379            .unwrap()
7380            .values()
7381            .append_value(6);
7382        list_builder
7383            .values()
7384            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7385            .unwrap()
7386            .append(true);
7387        list_builder.values().append(true);
7388        list_builder.append(true);
7389
7390        list_builder
7391            .values()
7392            .field_builder::<StringBuilder>(0)
7393            .unwrap()
7394            .append_value("Second");
7395        list_builder
7396            .values()
7397            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7398            .unwrap()
7399            .values()
7400            .append_value(4);
7401        list_builder
7402            .values()
7403            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7404            .unwrap()
7405            .values()
7406            .append_value(5);
7407        list_builder
7408            .values()
7409            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7410            .unwrap()
7411            .append(true);
7412        list_builder.values().append(true);
7413        list_builder.append(true);
7414
7415        let expected = list_builder.finish();
7416
7417        assert_eq!(array, &expected);
7418    }
7419
7420    fn build_2d_list(data: Vec<Option<i32>>) -> ListArray {
7421        let a1 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(data)]);
7422        ListArray::new(
7423            Arc::new(Field::new_list_field(
7424                DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
7425                true,
7426            )),
7427            OffsetBuffer::<i32>::from_lengths([1]),
7428            Arc::new(a1),
7429            None,
7430        )
7431    }
7432
7433    #[test]
7434    fn test_nested_lists() {
7435        // Define inner list scalars
7436        let arr1 = build_2d_list(vec![Some(1), Some(2), Some(3)]);
7437        let arr2 = build_2d_list(vec![Some(4), Some(5)]);
7438        let arr3 = build_2d_list(vec![Some(6)]);
7439
7440        let array = ScalarValue::iter_to_array(vec![
7441            ScalarValue::List(Arc::new(arr1)),
7442            ScalarValue::List(Arc::new(arr2)),
7443            ScalarValue::List(Arc::new(arr3)),
7444        ])
7445        .unwrap();
7446        let array = array.as_list::<i32>();
7447
7448        // Construct expected array with array builders
7449        let inner_builder = Int32Array::builder(6);
7450        let middle_builder = ListBuilder::new(inner_builder);
7451        let mut outer_builder = ListBuilder::new(middle_builder);
7452
7453        outer_builder.values().values().append_value(1);
7454        outer_builder.values().values().append_value(2);
7455        outer_builder.values().values().append_value(3);
7456        outer_builder.values().append(true);
7457        outer_builder.append(true);
7458
7459        outer_builder.values().values().append_value(4);
7460        outer_builder.values().values().append_value(5);
7461        outer_builder.values().append(true);
7462        outer_builder.append(true);
7463
7464        outer_builder.values().values().append_value(6);
7465        outer_builder.values().append(true);
7466        outer_builder.append(true);
7467
7468        let expected = outer_builder.finish();
7469
7470        assert_eq!(array, &expected);
7471    }
7472
7473    #[test]
7474    fn scalar_timestamp_ns_utc_timezone() {
7475        let scalar = ScalarValue::TimestampNanosecond(
7476            Some(1599566400000000000),
7477            Some("UTC".into()),
7478        );
7479
7480        assert_eq!(
7481            scalar.data_type(),
7482            DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
7483        );
7484
7485        let array = scalar.to_array().expect("Failed to convert to array");
7486        assert_eq!(array.len(), 1);
7487        assert_eq!(
7488            array.data_type(),
7489            &DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
7490        );
7491
7492        let new_scalar = ScalarValue::try_from_array(&array, 0).unwrap();
7493        assert_eq!(
7494            new_scalar.data_type(),
7495            DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
7496        );
7497    }
7498
7499    #[test]
7500    fn cast_round_trip() {
7501        check_scalar_cast(ScalarValue::Int8(Some(5)), DataType::Int16);
7502        check_scalar_cast(ScalarValue::Int8(None), DataType::Int16);
7503
7504        check_scalar_cast(ScalarValue::Float64(Some(5.5)), DataType::Int16);
7505
7506        check_scalar_cast(ScalarValue::Float64(None), DataType::Int16);
7507
7508        check_scalar_cast(
7509            ScalarValue::from("foo"),
7510            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
7511        );
7512
7513        check_scalar_cast(
7514            ScalarValue::Utf8(None),
7515            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
7516        );
7517
7518        check_scalar_cast(ScalarValue::Utf8(None), DataType::Utf8View);
7519        check_scalar_cast(ScalarValue::from("foo"), DataType::Utf8View);
7520        check_scalar_cast(
7521            ScalarValue::from("larger than 12 bytes string"),
7522            DataType::Utf8View,
7523        );
7524        check_scalar_cast(
7525            {
7526                let element_field =
7527                    Arc::new(Field::new("element", DataType::Int32, true));
7528
7529                let mut builder =
7530                    ListBuilder::new(Int32Builder::new()).with_field(element_field);
7531                builder.append_value([Some(1)]);
7532                builder.append(true);
7533
7534                ScalarValue::List(Arc::new(builder.finish()))
7535            },
7536            DataType::List(Arc::new(Field::new("element", DataType::Int64, true))),
7537        );
7538        check_scalar_cast(
7539            {
7540                let element_field =
7541                    Arc::new(Field::new("element", DataType::Int32, true));
7542
7543                let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 1)
7544                    .with_field(element_field);
7545                builder.values().append_value(1);
7546                builder.append(true);
7547
7548                ScalarValue::FixedSizeList(Arc::new(builder.finish()))
7549            },
7550            DataType::FixedSizeList(
7551                Arc::new(Field::new("element", DataType::Int64, true)),
7552                1,
7553            ),
7554        );
7555        check_scalar_cast(
7556            {
7557                let element_field =
7558                    Arc::new(Field::new("element", DataType::Int32, true));
7559
7560                let mut builder =
7561                    LargeListBuilder::new(Int32Builder::new()).with_field(element_field);
7562                builder.append_value([Some(1)]);
7563                builder.append(true);
7564
7565                ScalarValue::LargeList(Arc::new(builder.finish()))
7566            },
7567            DataType::LargeList(Arc::new(Field::new("element", DataType::Int64, true))),
7568        );
7569    }
7570
7571    // mimics how casting work on scalar values by `casting` `scalar` to `desired_type`
7572    fn check_scalar_cast(scalar: ScalarValue, desired_type: DataType) {
7573        // convert from scalar --> Array to call cast
7574        let scalar_array = scalar.to_array().expect("Failed to convert to array");
7575        // cast the actual value
7576        let cast_array = kernels::cast::cast(&scalar_array, &desired_type).unwrap();
7577
7578        // turn it back to a scalar
7579        let cast_scalar = ScalarValue::try_from_array(&cast_array, 0).unwrap();
7580        assert_eq!(cast_scalar.data_type(), desired_type);
7581
7582        // Some time later the "cast" scalar is turned back into an array:
7583        let array = cast_scalar
7584            .to_array_of_size(10)
7585            .expect("Failed to convert to array of size");
7586
7587        // The datatype should be "Dictionary" but is actually Utf8!!!
7588        assert_eq!(array.data_type(), &desired_type)
7589    }
7590
7591    #[test]
7592    fn test_scalar_negative() -> Result<()> {
7593        // positive test
7594        let value = ScalarValue::Int32(Some(12));
7595        assert_eq!(ScalarValue::Int32(Some(-12)), value.arithmetic_negate()?);
7596        let value = ScalarValue::Int32(None);
7597        assert_eq!(ScalarValue::Int32(None), value.arithmetic_negate()?);
7598
7599        // negative test
7600        let value = ScalarValue::UInt8(Some(12));
7601        assert!(value.arithmetic_negate().is_err());
7602        let value = ScalarValue::Boolean(None);
7603        assert!(value.arithmetic_negate().is_err());
7604        Ok(())
7605    }
7606
7607    #[test]
7608    #[allow(arithmetic_overflow)] // we want to test them
7609    fn test_scalar_negative_overflows() -> Result<()> {
7610        macro_rules! test_overflow_on_value {
7611            ($($val:expr),* $(,)?) => {$(
7612                {
7613                    let value: ScalarValue = $val;
7614                    let err = value.arithmetic_negate().expect_err("Should receive overflow error on negating {value:?}");
7615                    let root_err = err.find_root();
7616                    match  root_err{
7617                        DataFusionError::ArrowError(err, _) if matches!(err.as_ref(), ArrowError::ArithmeticOverflow(_)) => {}
7618                        _ => return Err(err),
7619                    };
7620                }
7621            )*};
7622        }
7623        test_overflow_on_value!(
7624            // the integers
7625            i8::MIN.into(),
7626            i16::MIN.into(),
7627            i32::MIN.into(),
7628            i64::MIN.into(),
7629            // for decimals, only value needs to be tested
7630            ScalarValue::try_new_decimal128(i128::MIN, 10, 5)?,
7631            ScalarValue::Decimal256(Some(i256::MIN), 20, 5),
7632            // interval, check all possible values
7633            ScalarValue::IntervalYearMonth(Some(i32::MIN)),
7634            ScalarValue::new_interval_dt(i32::MIN, 999),
7635            ScalarValue::new_interval_dt(1, i32::MIN),
7636            ScalarValue::new_interval_mdn(i32::MIN, 15, 123_456),
7637            ScalarValue::new_interval_mdn(12, i32::MIN, 123_456),
7638            ScalarValue::new_interval_mdn(12, 15, i64::MIN),
7639            // tz doesn't matter when negating
7640            ScalarValue::TimestampSecond(Some(i64::MIN), None),
7641            ScalarValue::TimestampMillisecond(Some(i64::MIN), None),
7642            ScalarValue::TimestampMicrosecond(Some(i64::MIN), None),
7643            ScalarValue::TimestampNanosecond(Some(i64::MIN), None),
7644        );
7645
7646        let float_cases = [
7647            (
7648                ScalarValue::Float16(Some(f16::MIN)),
7649                ScalarValue::Float16(Some(f16::MAX)),
7650            ),
7651            (
7652                ScalarValue::Float16(Some(f16::MAX)),
7653                ScalarValue::Float16(Some(f16::MIN)),
7654            ),
7655            (f32::MIN.into(), f32::MAX.into()),
7656            (f32::MAX.into(), f32::MIN.into()),
7657            (f64::MIN.into(), f64::MAX.into()),
7658            (f64::MAX.into(), f64::MIN.into()),
7659        ];
7660        // skip float 16 because they aren't supported
7661        for (test, expected) in float_cases.into_iter().skip(2) {
7662            assert_eq!(test.arithmetic_negate()?, expected);
7663        }
7664        Ok(())
7665    }
7666
7667    #[test]
7668    fn f16_test_overflow() {
7669        // TODO: if negate supports f16, add these cases to `test_scalar_negative_overflows` test case
7670        let cases = [
7671            (
7672                ScalarValue::Float16(Some(f16::MIN)),
7673                ScalarValue::Float16(Some(f16::MAX)),
7674            ),
7675            (
7676                ScalarValue::Float16(Some(f16::MAX)),
7677                ScalarValue::Float16(Some(f16::MIN)),
7678            ),
7679        ];
7680
7681        for (test, expected) in cases {
7682            assert_eq!(test.arithmetic_negate().unwrap(), expected);
7683        }
7684    }
7685
7686    macro_rules! expect_operation_error {
7687        ($TEST_NAME:ident, $FUNCTION:ident, $EXPECTED_ERROR:expr) => {
7688            #[test]
7689            fn $TEST_NAME() {
7690                let lhs = ScalarValue::UInt64(Some(12));
7691                let rhs = ScalarValue::Int32(Some(-3));
7692                match lhs.$FUNCTION(&rhs) {
7693                    Ok(_result) => {
7694                        panic!(
7695                            "Expected binary operation error between lhs: '{:?}', rhs: {:?}",
7696                            lhs, rhs
7697                        );
7698                    }
7699                    Err(e) => {
7700                        let error_message = e.to_string();
7701                        assert!(
7702                            error_message.contains($EXPECTED_ERROR),
7703                            "Expected error '{}' not found in actual error '{}'",
7704                            $EXPECTED_ERROR,
7705                            error_message
7706                        );
7707                    }
7708                }
7709            }
7710        };
7711    }
7712
7713    expect_operation_error!(
7714        expect_add_error,
7715        add,
7716        "Invalid arithmetic operation: UInt64 + Int32"
7717    );
7718    expect_operation_error!(
7719        expect_sub_error,
7720        sub,
7721        "Invalid arithmetic operation: UInt64 - Int32"
7722    );
7723
7724    macro_rules! decimal_op_test_cases {
7725    ($OPERATION:ident, [$([$L_VALUE:expr, $L_PRECISION:expr, $L_SCALE:expr, $R_VALUE:expr, $R_PRECISION:expr, $R_SCALE:expr, $O_VALUE:expr, $O_PRECISION:expr, $O_SCALE:expr]),+]) => {
7726            $(
7727
7728                let left = ScalarValue::Decimal128($L_VALUE, $L_PRECISION, $L_SCALE);
7729                let right = ScalarValue::Decimal128($R_VALUE, $R_PRECISION, $R_SCALE);
7730                let result = left.$OPERATION(&right).unwrap();
7731                assert_eq!(ScalarValue::Decimal128($O_VALUE, $O_PRECISION, $O_SCALE), result);
7732
7733            )+
7734        };
7735    }
7736
7737    #[test]
7738    fn decimal_operations() {
7739        decimal_op_test_cases!(
7740            add,
7741            [
7742                [Some(123), 10, 2, Some(124), 10, 2, Some(123 + 124), 11, 2],
7743                // test sum decimal with diff scale
7744                [
7745                    Some(123),
7746                    10,
7747                    3,
7748                    Some(124),
7749                    10,
7750                    2,
7751                    Some(123 + 124 * 10_i128.pow(1)),
7752                    12,
7753                    3
7754                ],
7755                // diff precision and scale for decimal data type
7756                [
7757                    Some(123),
7758                    10,
7759                    2,
7760                    Some(124),
7761                    11,
7762                    3,
7763                    Some(123 * 10_i128.pow(3 - 2) + 124),
7764                    12,
7765                    3
7766                ]
7767            ]
7768        );
7769    }
7770
7771    #[test]
7772    fn decimal_operations_with_nulls() {
7773        decimal_op_test_cases!(
7774            add,
7775            [
7776                // Case: (None, Some, 0)
7777                [None, 10, 2, Some(123), 10, 2, None, 11, 2],
7778                // Case: (Some, None, 0)
7779                [Some(123), 10, 2, None, 10, 2, None, 11, 2],
7780                // Case: (Some, None, _) + Side=False
7781                [Some(123), 8, 2, None, 10, 3, None, 11, 3],
7782                // Case: (None, Some, _) + Side=False
7783                [None, 8, 2, Some(123), 10, 3, None, 11, 3],
7784                // Case: (Some, None, _) + Side=True
7785                [Some(123), 8, 4, None, 10, 3, None, 12, 4],
7786                // Case: (None, Some, _) + Side=True
7787                [None, 10, 3, Some(123), 8, 4, None, 12, 4]
7788            ]
7789        );
7790    }
7791
7792    #[test]
7793    fn test_scalar_distance() {
7794        let cases = [
7795            // scalar (lhs), scalar (rhs), expected distance
7796            // ---------------------------------------------
7797            (ScalarValue::Int8(Some(1)), ScalarValue::Int8(Some(2)), 1),
7798            (ScalarValue::Int8(Some(2)), ScalarValue::Int8(Some(1)), 1),
7799            (
7800                ScalarValue::Int16(Some(-5)),
7801                ScalarValue::Int16(Some(5)),
7802                10,
7803            ),
7804            (
7805                ScalarValue::Int16(Some(5)),
7806                ScalarValue::Int16(Some(-5)),
7807                10,
7808            ),
7809            (ScalarValue::Int32(Some(0)), ScalarValue::Int32(Some(0)), 0),
7810            (
7811                ScalarValue::Int32(Some(-5)),
7812                ScalarValue::Int32(Some(-10)),
7813                5,
7814            ),
7815            (
7816                ScalarValue::Int64(Some(-10)),
7817                ScalarValue::Int64(Some(-5)),
7818                5,
7819            ),
7820            (ScalarValue::UInt8(Some(1)), ScalarValue::UInt8(Some(2)), 1),
7821            (ScalarValue::UInt8(Some(0)), ScalarValue::UInt8(Some(0)), 0),
7822            (
7823                ScalarValue::UInt16(Some(5)),
7824                ScalarValue::UInt16(Some(10)),
7825                5,
7826            ),
7827            (
7828                ScalarValue::UInt32(Some(10)),
7829                ScalarValue::UInt32(Some(5)),
7830                5,
7831            ),
7832            (
7833                ScalarValue::UInt64(Some(5)),
7834                ScalarValue::UInt64(Some(10)),
7835                5,
7836            ),
7837            (
7838                ScalarValue::Float16(Some(f16::from_f32(1.1))),
7839                ScalarValue::Float16(Some(f16::from_f32(1.9))),
7840                1,
7841            ),
7842            (
7843                ScalarValue::Float16(Some(f16::from_f32(-5.3))),
7844                ScalarValue::Float16(Some(f16::from_f32(-9.2))),
7845                4,
7846            ),
7847            (
7848                ScalarValue::Float16(Some(f16::from_f32(-5.3))),
7849                ScalarValue::Float16(Some(f16::from_f32(-9.7))),
7850                4,
7851            ),
7852            (
7853                ScalarValue::Float32(Some(1.0)),
7854                ScalarValue::Float32(Some(2.0)),
7855                1,
7856            ),
7857            (
7858                ScalarValue::Float32(Some(2.0)),
7859                ScalarValue::Float32(Some(1.0)),
7860                1,
7861            ),
7862            (
7863                ScalarValue::Float64(Some(0.0)),
7864                ScalarValue::Float64(Some(0.0)),
7865                0,
7866            ),
7867            (
7868                ScalarValue::Float64(Some(-5.0)),
7869                ScalarValue::Float64(Some(-10.0)),
7870                5,
7871            ),
7872            (
7873                ScalarValue::Float64(Some(-10.0)),
7874                ScalarValue::Float64(Some(-5.0)),
7875                5,
7876            ),
7877            // Floats are currently special cased to f64/f32 and the result is rounded
7878            // rather than ceiled/floored. In the future we might want to take a mode
7879            // which specified the rounding behavior.
7880            (
7881                ScalarValue::Float32(Some(1.2)),
7882                ScalarValue::Float32(Some(1.3)),
7883                0,
7884            ),
7885            (
7886                ScalarValue::Float32(Some(1.1)),
7887                ScalarValue::Float32(Some(1.9)),
7888                1,
7889            ),
7890            (
7891                ScalarValue::Float64(Some(-5.3)),
7892                ScalarValue::Float64(Some(-9.2)),
7893                4,
7894            ),
7895            (
7896                ScalarValue::Float64(Some(-5.3)),
7897                ScalarValue::Float64(Some(-9.7)),
7898                4,
7899            ),
7900            (
7901                ScalarValue::Float64(Some(-5.3)),
7902                ScalarValue::Float64(Some(-9.9)),
7903                5,
7904            ),
7905            (
7906                ScalarValue::Decimal128(Some(10), 1, 0),
7907                ScalarValue::Decimal128(Some(5), 1, 0),
7908                5,
7909            ),
7910            (
7911                ScalarValue::Decimal128(Some(5), 1, 0),
7912                ScalarValue::Decimal128(Some(10), 1, 0),
7913                5,
7914            ),
7915            (
7916                ScalarValue::Decimal256(Some(10.into()), 1, 0),
7917                ScalarValue::Decimal256(Some(5.into()), 1, 0),
7918                5,
7919            ),
7920            (
7921                ScalarValue::Decimal256(Some(5.into()), 1, 0),
7922                ScalarValue::Decimal256(Some(10.into()), 1, 0),
7923                5,
7924            ),
7925        ];
7926        for (lhs, rhs, expected) in cases.iter() {
7927            let distance = lhs.distance(rhs).unwrap();
7928            assert_eq!(distance, *expected);
7929        }
7930    }
7931
7932    #[test]
7933    fn test_distance_none() {
7934        let cases = [
7935            (
7936                ScalarValue::Decimal128(Some(i128::MAX), DECIMAL128_MAX_PRECISION, 0),
7937                ScalarValue::Decimal128(Some(-i128::MAX), DECIMAL128_MAX_PRECISION, 0),
7938            ),
7939            (
7940                ScalarValue::Decimal256(Some(i256::MAX), DECIMAL256_MAX_PRECISION, 0),
7941                ScalarValue::Decimal256(Some(-i256::MAX), DECIMAL256_MAX_PRECISION, 0),
7942            ),
7943        ];
7944        for (lhs, rhs) in cases.iter() {
7945            let distance = lhs.distance(rhs);
7946            assert!(distance.is_none(), "{lhs} vs {rhs}");
7947        }
7948    }
7949
7950    #[test]
7951    fn test_scalar_distance_invalid() {
7952        let cases = [
7953            // scalar (lhs), scalar (rhs)
7954            // --------------------------
7955            // Same type but with nulls
7956            (ScalarValue::Int8(None), ScalarValue::Int8(None)),
7957            (ScalarValue::Int8(None), ScalarValue::Int8(Some(1))),
7958            (ScalarValue::Int8(Some(1)), ScalarValue::Int8(None)),
7959            // Different type
7960            (ScalarValue::Int8(Some(1)), ScalarValue::Int16(Some(1))),
7961            (ScalarValue::Int8(Some(1)), ScalarValue::Float32(Some(1.0))),
7962            (
7963                ScalarValue::Float16(Some(f16::from_f32(1.0))),
7964                ScalarValue::Float32(Some(1.0)),
7965            ),
7966            (
7967                ScalarValue::Float16(Some(f16::from_f32(1.0))),
7968                ScalarValue::Int32(Some(1)),
7969            ),
7970            (
7971                ScalarValue::Float64(Some(1.1)),
7972                ScalarValue::Float32(Some(2.2)),
7973            ),
7974            (
7975                ScalarValue::UInt64(Some(777)),
7976                ScalarValue::Int32(Some(111)),
7977            ),
7978            // Different types with nulls
7979            (ScalarValue::Int8(None), ScalarValue::Int16(Some(1))),
7980            (ScalarValue::Int8(Some(1)), ScalarValue::Int16(None)),
7981            // Unsupported types
7982            (ScalarValue::from("foo"), ScalarValue::from("bar")),
7983            (
7984                ScalarValue::Boolean(Some(true)),
7985                ScalarValue::Boolean(Some(false)),
7986            ),
7987            (ScalarValue::Date32(Some(0)), ScalarValue::Date32(Some(1))),
7988            (ScalarValue::Date64(Some(0)), ScalarValue::Date64(Some(1))),
7989            (
7990                ScalarValue::Decimal128(Some(123), 5, 5),
7991                ScalarValue::Decimal128(Some(120), 5, 3),
7992            ),
7993            (
7994                ScalarValue::Decimal128(Some(123), 5, 5),
7995                ScalarValue::Decimal128(Some(120), 3, 5),
7996            ),
7997            (
7998                ScalarValue::Decimal256(Some(123.into()), 5, 5),
7999                ScalarValue::Decimal256(Some(120.into()), 3, 5),
8000            ),
8001            // Distance 2 * 2^50 is larger than usize
8002            (
8003                ScalarValue::Decimal256(
8004                    Some(i256::from_parts(0, 2_i64.pow(50).into())),
8005                    1,
8006                    0,
8007                ),
8008                ScalarValue::Decimal256(
8009                    Some(i256::from_parts(0, (-(2_i64).pow(50)).into())),
8010                    1,
8011                    0,
8012                ),
8013            ),
8014            // Distance overflow
8015            (
8016                ScalarValue::Decimal256(Some(i256::from_parts(0, i128::MAX)), 1, 0),
8017                ScalarValue::Decimal256(Some(i256::from_parts(0, -i128::MAX)), 1, 0),
8018            ),
8019        ];
8020        for (lhs, rhs) in cases {
8021            let distance = lhs.distance(&rhs);
8022            assert!(distance.is_none());
8023        }
8024    }
8025
8026    #[test]
8027    fn test_scalar_interval_negate() {
8028        let cases = [
8029            (
8030                ScalarValue::new_interval_ym(1, 12),
8031                ScalarValue::new_interval_ym(-1, -12),
8032            ),
8033            (
8034                ScalarValue::new_interval_dt(1, 999),
8035                ScalarValue::new_interval_dt(-1, -999),
8036            ),
8037            (
8038                ScalarValue::new_interval_mdn(12, 15, 123_456),
8039                ScalarValue::new_interval_mdn(-12, -15, -123_456),
8040            ),
8041        ];
8042        for (expr, expected) in cases.iter() {
8043            let result = expr.arithmetic_negate().unwrap();
8044            assert_eq!(*expected, result, "-expr:{expr:?}");
8045        }
8046    }
8047
8048    #[test]
8049    fn test_scalar_interval_add() {
8050        let cases = [
8051            (
8052                ScalarValue::new_interval_ym(1, 12),
8053                ScalarValue::new_interval_ym(1, 12),
8054                ScalarValue::new_interval_ym(2, 24),
8055            ),
8056            (
8057                ScalarValue::new_interval_dt(1, 999),
8058                ScalarValue::new_interval_dt(1, 999),
8059                ScalarValue::new_interval_dt(2, 1998),
8060            ),
8061            (
8062                ScalarValue::new_interval_mdn(12, 15, 123_456),
8063                ScalarValue::new_interval_mdn(12, 15, 123_456),
8064                ScalarValue::new_interval_mdn(24, 30, 246_912),
8065            ),
8066        ];
8067        for (lhs, rhs, expected) in cases.iter() {
8068            let result = lhs.add(rhs).unwrap();
8069            let result_commute = rhs.add(lhs).unwrap();
8070            assert_eq!(*expected, result, "lhs:{lhs:?} + rhs:{rhs:?}");
8071            assert_eq!(*expected, result_commute, "lhs:{rhs:?} + rhs:{lhs:?}");
8072        }
8073    }
8074
8075    #[test]
8076    fn test_scalar_interval_sub() {
8077        let cases = [
8078            (
8079                ScalarValue::new_interval_ym(1, 12),
8080                ScalarValue::new_interval_ym(1, 12),
8081                ScalarValue::new_interval_ym(0, 0),
8082            ),
8083            (
8084                ScalarValue::new_interval_dt(1, 999),
8085                ScalarValue::new_interval_dt(1, 999),
8086                ScalarValue::new_interval_dt(0, 0),
8087            ),
8088            (
8089                ScalarValue::new_interval_mdn(12, 15, 123_456),
8090                ScalarValue::new_interval_mdn(12, 15, 123_456),
8091                ScalarValue::new_interval_mdn(0, 0, 0),
8092            ),
8093        ];
8094        for (lhs, rhs, expected) in cases.iter() {
8095            let result = lhs.sub(rhs).unwrap();
8096            assert_eq!(*expected, result, "lhs:{lhs:?} - rhs:{rhs:?}");
8097        }
8098    }
8099
8100    #[test]
8101    fn timestamp_op_random_tests() {
8102        // timestamp1 + (or -) interval = timestamp2
8103        // timestamp2 - timestamp1 (or timestamp1 - timestamp2) = interval ?
8104        let sample_size = 1000;
8105        let timestamps1 = get_random_timestamps(sample_size);
8106        let intervals = get_random_intervals(sample_size);
8107        // ts(sec) + interval(ns) = ts(sec); however,
8108        // ts(sec) - ts(sec) cannot be = interval(ns). Therefore,
8109        // timestamps are more precise than intervals in tests.
8110        for (idx, ts1) in timestamps1.iter().enumerate() {
8111            if idx % 2 == 0 {
8112                let timestamp2 = ts1.add(intervals[idx].clone()).unwrap();
8113                let back = timestamp2.sub(intervals[idx].clone()).unwrap();
8114                assert_eq!(ts1, &back);
8115            } else {
8116                let timestamp2 = ts1.sub(intervals[idx].clone()).unwrap();
8117                let back = timestamp2.add(intervals[idx].clone()).unwrap();
8118                assert_eq!(ts1, &back);
8119            };
8120        }
8121    }
8122
8123    #[test]
8124    fn test_struct_nulls() {
8125        let fields_b = Fields::from(vec![
8126            Field::new("ba", DataType::UInt64, true),
8127            Field::new("bb", DataType::UInt64, true),
8128        ]);
8129        let fields = Fields::from(vec![
8130            Field::new("a", DataType::UInt64, true),
8131            Field::new("b", DataType::Struct(fields_b.clone()), true),
8132        ]);
8133
8134        let struct_value = vec![
8135            (
8136                Arc::clone(&fields[0]),
8137                Arc::new(UInt64Array::from(vec![Some(1)])) as ArrayRef,
8138            ),
8139            (
8140                Arc::clone(&fields[1]),
8141                Arc::new(StructArray::from(vec![
8142                    (
8143                        Arc::clone(&fields_b[0]),
8144                        Arc::new(UInt64Array::from(vec![Some(2)])) as ArrayRef,
8145                    ),
8146                    (
8147                        Arc::clone(&fields_b[1]),
8148                        Arc::new(UInt64Array::from(vec![Some(3)])) as ArrayRef,
8149                    ),
8150                ])) as ArrayRef,
8151            ),
8152        ];
8153
8154        let struct_value_with_nulls = vec![
8155            (
8156                Arc::clone(&fields[0]),
8157                Arc::new(UInt64Array::from(vec![Some(1)])) as ArrayRef,
8158            ),
8159            (
8160                Arc::clone(&fields[1]),
8161                Arc::new(StructArray::from((
8162                    vec![
8163                        (
8164                            Arc::clone(&fields_b[0]),
8165                            Arc::new(UInt64Array::from(vec![Some(2)])) as ArrayRef,
8166                        ),
8167                        (
8168                            Arc::clone(&fields_b[1]),
8169                            Arc::new(UInt64Array::from(vec![Some(3)])) as ArrayRef,
8170                        ),
8171                    ],
8172                    Buffer::from(&[0]),
8173                ))) as ArrayRef,
8174            ),
8175        ];
8176
8177        let scalars = vec![
8178            // all null
8179            ScalarValue::Struct(Arc::new(StructArray::from((
8180                struct_value.clone(),
8181                Buffer::from(&[0]),
8182            )))),
8183            // field 1 valid, field 2 null
8184            ScalarValue::Struct(Arc::new(StructArray::from((
8185                struct_value_with_nulls.clone(),
8186                Buffer::from(&[1]),
8187            )))),
8188            // all valid
8189            ScalarValue::Struct(Arc::new(StructArray::from((
8190                struct_value.clone(),
8191                Buffer::from(&[1]),
8192            )))),
8193        ];
8194
8195        let check_array = |array| {
8196            let is_null = is_null(&array).unwrap();
8197            assert_eq!(is_null, BooleanArray::from(vec![true, false, false]));
8198
8199            let formatted = pretty_format_columns("col", &[array]).unwrap().to_string();
8200            let formatted = formatted.split('\n').collect::<Vec<_>>();
8201            let expected = vec![
8202                "+---------------------------+",
8203                "| col                       |",
8204                "+---------------------------+",
8205                "|                           |",
8206                "| {a: 1, b: }               |",
8207                "| {a: 1, b: {ba: 2, bb: 3}} |",
8208                "+---------------------------+",
8209            ];
8210            assert_eq!(
8211                formatted, expected,
8212                "Actual:\n{formatted:#?}\n\nExpected:\n{expected:#?}"
8213            );
8214        };
8215
8216        // test `ScalarValue::iter_to_array`
8217        let array = ScalarValue::iter_to_array(scalars.clone()).unwrap();
8218        check_array(array);
8219
8220        // test `ScalarValue::to_array` / `ScalarValue::to_array_of_size`
8221        let arrays = scalars
8222            .iter()
8223            .map(ScalarValue::to_array)
8224            .collect::<Result<Vec<_>>>()
8225            .expect("Failed to convert to array");
8226        let arrays = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
8227        let array = arrow::compute::concat(&arrays).unwrap();
8228        check_array(array);
8229    }
8230
8231    #[test]
8232    fn test_struct_display() {
8233        let field_a = Field::new("a", DataType::Int32, true);
8234        let field_b = Field::new("b", DataType::Utf8, true);
8235
8236        let s = ScalarStructBuilder::new()
8237            .with_scalar(field_a, ScalarValue::from(1i32))
8238            .with_scalar(field_b, ScalarValue::Utf8(None))
8239            .build()
8240            .unwrap();
8241
8242        assert_eq!(s.to_string(), "{a:1,b:}");
8243        assert_eq!(format!("{s:?}"), r#"Struct({a:1,b:})"#);
8244
8245        let ScalarValue::Struct(arr) = s else {
8246            panic!("Expected struct");
8247        };
8248
8249        //verify compared to arrow display
8250        let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
8251        assert_snapshot!(batches_to_string(&[batch]), @r"
8252        +-------------+
8253        | s           |
8254        +-------------+
8255        | {a: 1, b: } |
8256        +-------------+
8257        ");
8258    }
8259
8260    #[test]
8261    fn test_null_bug() {
8262        let field_a = Field::new("a", DataType::Int32, true);
8263        let field_b = Field::new("b", DataType::Int32, true);
8264        let fields = Fields::from(vec![field_a, field_b]);
8265
8266        let array_a = Arc::new(Int32Array::from_iter_values([1]));
8267        let array_b = Arc::new(Int32Array::from_iter_values([2]));
8268        let arrays: Vec<ArrayRef> = vec![array_a, array_b];
8269
8270        let mut not_nulls = NullBufferBuilder::new(1);
8271
8272        not_nulls.append_non_null();
8273
8274        let ar = StructArray::new(fields, arrays, not_nulls.finish());
8275        let s = ScalarValue::Struct(Arc::new(ar));
8276
8277        assert_eq!(s.to_string(), "{a:1,b:2}");
8278        assert_eq!(format!("{s:?}"), r#"Struct({a:1,b:2})"#);
8279
8280        let ScalarValue::Struct(arr) = s else {
8281            panic!("Expected struct");
8282        };
8283
8284        //verify compared to arrow display
8285        let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
8286        assert_snapshot!(batches_to_string(&[batch]), @r"
8287        +--------------+
8288        | s            |
8289        +--------------+
8290        | {a: 1, b: 2} |
8291        +--------------+
8292        ");
8293    }
8294
8295    #[test]
8296    fn test_display_date64_large_values() {
8297        assert_eq!(
8298            format!("{}", ScalarValue::Date64(Some(790179464505))),
8299            "1995-01-15"
8300        );
8301        // This used to panic, see https://github.com/apache/arrow-rs/issues/7728
8302        assert_eq!(
8303            format!("{}", ScalarValue::Date64(Some(-790179464505600000))),
8304            ""
8305        );
8306    }
8307
8308    #[test]
8309    fn test_struct_display_null() {
8310        let fields = vec![Field::new("a", DataType::Int32, false)];
8311        let s = ScalarStructBuilder::new_null(fields);
8312        assert_eq!(s.to_string(), "NULL");
8313
8314        let ScalarValue::Struct(arr) = s else {
8315            panic!("Expected struct");
8316        };
8317
8318        //verify compared to arrow display
8319        let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
8320
8321        assert_snapshot!(batches_to_string(&[batch]), @r"
8322        +---+
8323        | s |
8324        +---+
8325        |   |
8326        +---+
8327        ");
8328    }
8329
8330    #[test]
8331    fn test_map_display_and_debug() {
8332        let string_builder = StringBuilder::new();
8333        let int_builder = Int32Builder::with_capacity(4);
8334        let mut builder = MapBuilder::new(None, string_builder, int_builder);
8335        builder.keys().append_value("joe");
8336        builder.values().append_value(1);
8337        builder.append(true).unwrap();
8338
8339        builder.keys().append_value("blogs");
8340        builder.values().append_value(2);
8341        builder.keys().append_value("foo");
8342        builder.values().append_value(4);
8343        builder.append(true).unwrap();
8344        builder.append(true).unwrap();
8345        builder.append(false).unwrap();
8346
8347        let map_value = ScalarValue::Map(Arc::new(builder.finish()));
8348
8349        assert_eq!(map_value.to_string(), "[{joe:1},{blogs:2,foo:4},{},NULL]");
8350        assert_eq!(
8351            format!("{map_value:?}"),
8352            r#"Map([{"joe":"1"},{"blogs":"2","foo":"4"},{},NULL])"#
8353        );
8354
8355        let ScalarValue::Map(arr) = map_value else {
8356            panic!("Expected map");
8357        };
8358
8359        //verify compared to arrow display
8360        let batch = RecordBatch::try_from_iter(vec![("m", arr as _)]).unwrap();
8361        assert_snapshot!(batches_to_string(&[batch]), @r"
8362        +--------------------+
8363        | m                  |
8364        +--------------------+
8365        | {joe: 1}           |
8366        | {blogs: 2, foo: 4} |
8367        | {}                 |
8368        |                    |
8369        +--------------------+
8370        ");
8371    }
8372
8373    #[test]
8374    fn test_binary_display() {
8375        let no_binary_value = ScalarValue::Binary(None);
8376        assert_eq!(format!("{no_binary_value}"), "NULL");
8377        let single_binary_value = ScalarValue::Binary(Some(vec![42u8]));
8378        assert_eq!(format!("{single_binary_value}"), "2A");
8379        let small_binary_value = ScalarValue::Binary(Some(vec![1u8, 2, 3]));
8380        assert_eq!(format!("{small_binary_value}"), "010203");
8381        let large_binary_value =
8382            ScalarValue::Binary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
8383        assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
8384
8385        let no_binary_value = ScalarValue::BinaryView(None);
8386        assert_eq!(format!("{no_binary_value}"), "NULL");
8387        let small_binary_value = ScalarValue::BinaryView(Some(vec![1u8, 2, 3]));
8388        assert_eq!(format!("{small_binary_value}"), "010203");
8389        let large_binary_value =
8390            ScalarValue::BinaryView(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
8391        assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
8392
8393        let no_binary_value = ScalarValue::LargeBinary(None);
8394        assert_eq!(format!("{no_binary_value}"), "NULL");
8395        let small_binary_value = ScalarValue::LargeBinary(Some(vec![1u8, 2, 3]));
8396        assert_eq!(format!("{small_binary_value}"), "010203");
8397        let large_binary_value =
8398            ScalarValue::LargeBinary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
8399        assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
8400
8401        let no_binary_value = ScalarValue::FixedSizeBinary(3, None);
8402        assert_eq!(format!("{no_binary_value}"), "NULL");
8403        let small_binary_value = ScalarValue::FixedSizeBinary(3, Some(vec![1u8, 2, 3]));
8404        assert_eq!(format!("{small_binary_value}"), "010203");
8405        let large_binary_value = ScalarValue::FixedSizeBinary(
8406            11,
8407            Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
8408        );
8409        assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
8410    }
8411
8412    #[test]
8413    fn test_binary_debug() {
8414        let no_binary_value = ScalarValue::Binary(None);
8415        assert_eq!(format!("{no_binary_value:?}"), "Binary(NULL)");
8416        let single_binary_value = ScalarValue::Binary(Some(vec![42u8]));
8417        assert_eq!(format!("{single_binary_value:?}"), "Binary(\"42\")");
8418        let small_binary_value = ScalarValue::Binary(Some(vec![1u8, 2, 3]));
8419        assert_eq!(format!("{small_binary_value:?}"), "Binary(\"1,2,3\")");
8420        let large_binary_value =
8421            ScalarValue::Binary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
8422        assert_eq!(
8423            format!("{large_binary_value:?}"),
8424            "Binary(\"1,2,3,4,5,6,7,8,9,10,11\")"
8425        );
8426
8427        let no_binary_value = ScalarValue::BinaryView(None);
8428        assert_eq!(format!("{no_binary_value:?}"), "BinaryView(NULL)");
8429        let small_binary_value = ScalarValue::BinaryView(Some(vec![1u8, 2, 3]));
8430        assert_eq!(format!("{small_binary_value:?}"), "BinaryView(\"1,2,3\")");
8431        let large_binary_value =
8432            ScalarValue::BinaryView(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
8433        assert_eq!(
8434            format!("{large_binary_value:?}"),
8435            "BinaryView(\"1,2,3,4,5,6,7,8,9,10,11\")"
8436        );
8437
8438        let no_binary_value = ScalarValue::LargeBinary(None);
8439        assert_eq!(format!("{no_binary_value:?}"), "LargeBinary(NULL)");
8440        let small_binary_value = ScalarValue::LargeBinary(Some(vec![1u8, 2, 3]));
8441        assert_eq!(format!("{small_binary_value:?}"), "LargeBinary(\"1,2,3\")");
8442        let large_binary_value =
8443            ScalarValue::LargeBinary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
8444        assert_eq!(
8445            format!("{large_binary_value:?}"),
8446            "LargeBinary(\"1,2,3,4,5,6,7,8,9,10,11\")"
8447        );
8448
8449        let no_binary_value = ScalarValue::FixedSizeBinary(3, None);
8450        assert_eq!(format!("{no_binary_value:?}"), "FixedSizeBinary(3, NULL)");
8451        let small_binary_value = ScalarValue::FixedSizeBinary(3, Some(vec![1u8, 2, 3]));
8452        assert_eq!(
8453            format!("{small_binary_value:?}"),
8454            "FixedSizeBinary(3, \"1,2,3\")"
8455        );
8456        let large_binary_value = ScalarValue::FixedSizeBinary(
8457            11,
8458            Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
8459        );
8460        assert_eq!(
8461            format!("{large_binary_value:?}"),
8462            "FixedSizeBinary(11, \"1,2,3,4,5,6,7,8,9,10,11\")"
8463        );
8464    }
8465
8466    #[test]
8467    fn test_build_timestamp_millisecond_list() {
8468        let values = vec![ScalarValue::TimestampMillisecond(Some(1), None)];
8469        let arr = ScalarValue::new_list_nullable(
8470            &values,
8471            &DataType::Timestamp(TimeUnit::Millisecond, None),
8472        );
8473        assert_eq!(1, arr.len());
8474    }
8475
8476    #[test]
8477    fn test_newlist_timestamp_zone() {
8478        let s: &'static str = "UTC";
8479        let values = vec![ScalarValue::TimestampMillisecond(Some(1), Some(s.into()))];
8480        let arr = ScalarValue::new_list_nullable(
8481            &values,
8482            &DataType::Timestamp(TimeUnit::Millisecond, Some(s.into())),
8483        );
8484        assert_eq!(1, arr.len());
8485        assert_eq!(
8486            arr.data_type(),
8487            &DataType::List(Arc::new(Field::new_list_field(
8488                DataType::Timestamp(TimeUnit::Millisecond, Some(s.into())),
8489                true,
8490            )))
8491        );
8492    }
8493
8494    fn get_random_timestamps(sample_size: u64) -> Vec<ScalarValue> {
8495        let vector_size = sample_size;
8496        let mut timestamp = vec![];
8497        let mut rng = rand::rng();
8498        for i in 0..vector_size {
8499            let year = rng.random_range(1995..=2050);
8500            let month = rng.random_range(1..=12);
8501            let day = rng.random_range(1..=28); // to exclude invalid dates
8502            let hour = rng.random_range(0..=23);
8503            let minute = rng.random_range(0..=59);
8504            let second = rng.random_range(0..=59);
8505            if i % 4 == 0 {
8506                timestamp.push(ScalarValue::TimestampSecond(
8507                    Some(
8508                        NaiveDate::from_ymd_opt(year, month, day)
8509                            .unwrap()
8510                            .and_hms_opt(hour, minute, second)
8511                            .unwrap()
8512                            .and_utc()
8513                            .timestamp(),
8514                    ),
8515                    None,
8516                ))
8517            } else if i % 4 == 1 {
8518                let millisec = rng.random_range(0..=999);
8519                timestamp.push(ScalarValue::TimestampMillisecond(
8520                    Some(
8521                        NaiveDate::from_ymd_opt(year, month, day)
8522                            .unwrap()
8523                            .and_hms_milli_opt(hour, minute, second, millisec)
8524                            .unwrap()
8525                            .and_utc()
8526                            .timestamp_millis(),
8527                    ),
8528                    None,
8529                ))
8530            } else if i % 4 == 2 {
8531                let microsec = rng.random_range(0..=999_999);
8532                timestamp.push(ScalarValue::TimestampMicrosecond(
8533                    Some(
8534                        NaiveDate::from_ymd_opt(year, month, day)
8535                            .unwrap()
8536                            .and_hms_micro_opt(hour, minute, second, microsec)
8537                            .unwrap()
8538                            .and_utc()
8539                            .timestamp_micros(),
8540                    ),
8541                    None,
8542                ))
8543            } else if i % 4 == 3 {
8544                let nanosec = rng.random_range(0..=999_999_999);
8545                timestamp.push(ScalarValue::TimestampNanosecond(
8546                    Some(
8547                        NaiveDate::from_ymd_opt(year, month, day)
8548                            .unwrap()
8549                            .and_hms_nano_opt(hour, minute, second, nanosec)
8550                            .unwrap()
8551                            .and_utc()
8552                            .timestamp_nanos_opt()
8553                            .unwrap(),
8554                    ),
8555                    None,
8556                ))
8557            }
8558        }
8559        timestamp
8560    }
8561
8562    fn get_random_intervals(sample_size: u64) -> Vec<ScalarValue> {
8563        const MILLISECS_IN_ONE_DAY: i64 = 86_400_000;
8564        const NANOSECS_IN_ONE_DAY: i64 = 86_400_000_000_000;
8565
8566        let vector_size = sample_size;
8567        let mut intervals = vec![];
8568        let mut rng = rand::rng();
8569        const SECS_IN_ONE_DAY: i32 = 86_400;
8570        const MICROSECS_IN_ONE_DAY: i64 = 86_400_000_000;
8571        for i in 0..vector_size {
8572            if i % 4 == 0 {
8573                let days = rng.random_range(0..5000);
8574                // to not break second precision
8575                let millis = rng.random_range(0..SECS_IN_ONE_DAY) * 1000;
8576                intervals.push(ScalarValue::new_interval_dt(days, millis));
8577            } else if i % 4 == 1 {
8578                let days = rng.random_range(0..5000);
8579                let millisec = rng.random_range(0..(MILLISECS_IN_ONE_DAY as i32));
8580                intervals.push(ScalarValue::new_interval_dt(days, millisec));
8581            } else if i % 4 == 2 {
8582                let days = rng.random_range(0..5000);
8583                // to not break microsec precision
8584                let nanosec = rng.random_range(0..MICROSECS_IN_ONE_DAY) * 1000;
8585                intervals.push(ScalarValue::new_interval_mdn(0, days, nanosec));
8586            } else {
8587                let days = rng.random_range(0..5000);
8588                let nanosec = rng.random_range(0..NANOSECS_IN_ONE_DAY);
8589                intervals.push(ScalarValue::new_interval_mdn(0, days, nanosec));
8590            }
8591        }
8592        intervals
8593    }
8594
8595    fn union_fields() -> UnionFields {
8596        [
8597            (0, Arc::new(Field::new("A", DataType::Int32, true))),
8598            (1, Arc::new(Field::new("B", DataType::Float64, true))),
8599        ]
8600        .into_iter()
8601        .collect()
8602    }
8603
8604    #[test]
8605    fn sparse_scalar_union_is_null() {
8606        let sparse_scalar = ScalarValue::Union(
8607            Some((0_i8, Box::new(ScalarValue::Int32(None)))),
8608            union_fields(),
8609            UnionMode::Sparse,
8610        );
8611        assert!(sparse_scalar.is_null());
8612    }
8613
8614    #[test]
8615    fn dense_scalar_union_is_null() {
8616        let dense_scalar = ScalarValue::Union(
8617            Some((0_i8, Box::new(ScalarValue::Int32(None)))),
8618            union_fields(),
8619            UnionMode::Dense,
8620        );
8621        assert!(dense_scalar.is_null());
8622    }
8623
8624    #[test]
8625    fn null_dictionary_scalar_produces_null_dictionary_array() {
8626        let dictionary_scalar = ScalarValue::Dictionary(
8627            Box::new(DataType::Int32),
8628            Box::new(ScalarValue::Null),
8629        );
8630        assert!(dictionary_scalar.is_null());
8631        let dictionary_array = dictionary_scalar.to_array().unwrap();
8632        assert!(dictionary_array.is_null(0));
8633    }
8634
8635    #[test]
8636    fn test_scalar_value_try_new_null() {
8637        let scalars = vec![
8638            ScalarValue::try_new_null(&DataType::Boolean).unwrap(),
8639            ScalarValue::try_new_null(&DataType::Int8).unwrap(),
8640            ScalarValue::try_new_null(&DataType::Int16).unwrap(),
8641            ScalarValue::try_new_null(&DataType::Int32).unwrap(),
8642            ScalarValue::try_new_null(&DataType::Int64).unwrap(),
8643            ScalarValue::try_new_null(&DataType::UInt8).unwrap(),
8644            ScalarValue::try_new_null(&DataType::UInt16).unwrap(),
8645            ScalarValue::try_new_null(&DataType::UInt32).unwrap(),
8646            ScalarValue::try_new_null(&DataType::UInt64).unwrap(),
8647            ScalarValue::try_new_null(&DataType::Float16).unwrap(),
8648            ScalarValue::try_new_null(&DataType::Float32).unwrap(),
8649            ScalarValue::try_new_null(&DataType::Float64).unwrap(),
8650            ScalarValue::try_new_null(&DataType::Decimal128(42, 42)).unwrap(),
8651            ScalarValue::try_new_null(&DataType::Decimal256(42, 42)).unwrap(),
8652            ScalarValue::try_new_null(&DataType::Utf8).unwrap(),
8653            ScalarValue::try_new_null(&DataType::LargeUtf8).unwrap(),
8654            ScalarValue::try_new_null(&DataType::Utf8View).unwrap(),
8655            ScalarValue::try_new_null(&DataType::Binary).unwrap(),
8656            ScalarValue::try_new_null(&DataType::BinaryView).unwrap(),
8657            ScalarValue::try_new_null(&DataType::FixedSizeBinary(42)).unwrap(),
8658            ScalarValue::try_new_null(&DataType::LargeBinary).unwrap(),
8659            ScalarValue::try_new_null(&DataType::Date32).unwrap(),
8660            ScalarValue::try_new_null(&DataType::Date64).unwrap(),
8661            ScalarValue::try_new_null(&DataType::Time32(TimeUnit::Second)).unwrap(),
8662            ScalarValue::try_new_null(&DataType::Time32(TimeUnit::Millisecond)).unwrap(),
8663            ScalarValue::try_new_null(&DataType::Time64(TimeUnit::Microsecond)).unwrap(),
8664            ScalarValue::try_new_null(&DataType::Time64(TimeUnit::Nanosecond)).unwrap(),
8665            ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Second, None))
8666                .unwrap(),
8667            ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Millisecond, None))
8668                .unwrap(),
8669            ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Microsecond, None))
8670                .unwrap(),
8671            ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Nanosecond, None))
8672                .unwrap(),
8673            ScalarValue::try_new_null(&DataType::Interval(IntervalUnit::YearMonth))
8674                .unwrap(),
8675            ScalarValue::try_new_null(&DataType::Interval(IntervalUnit::DayTime))
8676                .unwrap(),
8677            ScalarValue::try_new_null(&DataType::Interval(IntervalUnit::MonthDayNano))
8678                .unwrap(),
8679            ScalarValue::try_new_null(&DataType::Duration(TimeUnit::Second)).unwrap(),
8680            ScalarValue::try_new_null(&DataType::Duration(TimeUnit::Microsecond))
8681                .unwrap(),
8682            ScalarValue::try_new_null(&DataType::Duration(TimeUnit::Nanosecond)).unwrap(),
8683            ScalarValue::try_new_null(&DataType::Null).unwrap(),
8684        ];
8685        assert!(scalars.iter().all(|s| s.is_null()));
8686
8687        let field_ref = Arc::new(Field::new("foo", DataType::Int32, true));
8688        let map_field_ref = Arc::new(Field::new(
8689            "foo",
8690            DataType::Struct(Fields::from(vec![
8691                Field::new("bar", DataType::Utf8, true),
8692                Field::new("baz", DataType::Int32, true),
8693            ])),
8694            true,
8695        ));
8696        let scalars = [
8697            ScalarValue::try_new_null(&DataType::List(Arc::clone(&field_ref))).unwrap(),
8698            ScalarValue::try_new_null(&DataType::LargeList(Arc::clone(&field_ref)))
8699                .unwrap(),
8700            ScalarValue::try_new_null(&DataType::FixedSizeList(
8701                Arc::clone(&field_ref),
8702                42,
8703            ))
8704            .unwrap(),
8705            ScalarValue::try_new_null(&DataType::Struct(
8706                vec![Arc::clone(&field_ref)].into(),
8707            ))
8708            .unwrap(),
8709            ScalarValue::try_new_null(&DataType::Map(map_field_ref, false)).unwrap(),
8710            ScalarValue::try_new_null(&DataType::Union(
8711                UnionFields::new(vec![42], vec![field_ref]),
8712                UnionMode::Dense,
8713            ))
8714            .unwrap(),
8715        ];
8716        assert!(scalars.iter().all(|s| s.is_null()));
8717    }
8718
8719    // `err.to_string()` depends on backtrace being present (may have backtrace appended)
8720    // `err.strip_backtrace()` also depends on backtrace being present (may have "This was likely caused by ..." stripped)
8721    fn assert_starts_with(actual: impl AsRef<str>, expected_prefix: impl AsRef<str>) {
8722        let actual = actual.as_ref();
8723        let expected_prefix = expected_prefix.as_ref();
8724        assert!(
8725            actual.starts_with(expected_prefix),
8726            "Expected '{actual}' to start with '{expected_prefix}'"
8727        );
8728    }
8729
8730    #[test]
8731    fn test_new_default() {
8732        // Test numeric types
8733        assert_eq!(
8734            ScalarValue::new_default(&DataType::Int32).unwrap(),
8735            ScalarValue::Int32(Some(0))
8736        );
8737        assert_eq!(
8738            ScalarValue::new_default(&DataType::Float64).unwrap(),
8739            ScalarValue::Float64(Some(0.0))
8740        );
8741        assert_eq!(
8742            ScalarValue::new_default(&DataType::Boolean).unwrap(),
8743            ScalarValue::Boolean(Some(false))
8744        );
8745
8746        // Test string types
8747        assert_eq!(
8748            ScalarValue::new_default(&DataType::Utf8).unwrap(),
8749            ScalarValue::Utf8(Some("".to_string()))
8750        );
8751        assert_eq!(
8752            ScalarValue::new_default(&DataType::LargeUtf8).unwrap(),
8753            ScalarValue::LargeUtf8(Some("".to_string()))
8754        );
8755
8756        // Test binary types
8757        assert_eq!(
8758            ScalarValue::new_default(&DataType::Binary).unwrap(),
8759            ScalarValue::Binary(Some(vec![]))
8760        );
8761
8762        // Test fixed size binary
8763        assert_eq!(
8764            ScalarValue::new_default(&DataType::FixedSizeBinary(5)).unwrap(),
8765            ScalarValue::FixedSizeBinary(5, Some(vec![0, 0, 0, 0, 0]))
8766        );
8767
8768        // Test temporal types
8769        assert_eq!(
8770            ScalarValue::new_default(&DataType::Date32).unwrap(),
8771            ScalarValue::Date32(Some(0))
8772        );
8773        assert_eq!(
8774            ScalarValue::new_default(&DataType::Time32(TimeUnit::Second)).unwrap(),
8775            ScalarValue::Time32Second(Some(0))
8776        );
8777
8778        // Test decimal types
8779        assert_eq!(
8780            ScalarValue::new_default(&DataType::Decimal128(10, 2)).unwrap(),
8781            ScalarValue::Decimal128(Some(0), 10, 2)
8782        );
8783
8784        // Test list type
8785        let list_field = Field::new_list_field(DataType::Int32, true);
8786        let list_result =
8787            ScalarValue::new_default(&DataType::List(Arc::new(list_field.clone())))
8788                .unwrap();
8789        match list_result {
8790            ScalarValue::List(arr) => {
8791                assert_eq!(arr.len(), 1);
8792                assert_eq!(arr.value_length(0), 0); // empty list
8793            }
8794            _ => panic!("Expected List"),
8795        }
8796
8797        // Test struct type
8798        let struct_fields = Fields::from(vec![
8799            Field::new("a", DataType::Int32, false),
8800            Field::new("b", DataType::Utf8, false),
8801        ]);
8802        let struct_result =
8803            ScalarValue::new_default(&DataType::Struct(struct_fields.clone())).unwrap();
8804        match struct_result {
8805            ScalarValue::Struct(arr) => {
8806                assert_eq!(arr.len(), 1);
8807                assert_eq!(arr.column(0).as_primitive::<Int32Type>().value(0), 0);
8808                assert_eq!(arr.column(1).as_string::<i32>().value(0), "");
8809            }
8810            _ => panic!("Expected Struct"),
8811        }
8812
8813        // Test union type
8814        let union_fields = UnionFields::new(
8815            vec![0, 1],
8816            vec![
8817                Field::new("i32", DataType::Int32, false),
8818                Field::new("f64", DataType::Float64, false),
8819            ],
8820        );
8821        let union_result = ScalarValue::new_default(&DataType::Union(
8822            union_fields.clone(),
8823            UnionMode::Sparse,
8824        ))
8825        .unwrap();
8826        match union_result {
8827            ScalarValue::Union(Some((type_id, value)), _, _) => {
8828                assert_eq!(type_id, 0);
8829                assert_eq!(*value, ScalarValue::Int32(Some(0)));
8830            }
8831            _ => panic!("Expected Union"),
8832        }
8833    }
8834
8835    #[test]
8836    fn test_scalar_min() {
8837        // Test integer types
8838        assert_eq!(
8839            ScalarValue::min(&DataType::Int8),
8840            Some(ScalarValue::Int8(Some(i8::MIN)))
8841        );
8842        assert_eq!(
8843            ScalarValue::min(&DataType::Int32),
8844            Some(ScalarValue::Int32(Some(i32::MIN)))
8845        );
8846        assert_eq!(
8847            ScalarValue::min(&DataType::UInt8),
8848            Some(ScalarValue::UInt8(Some(0)))
8849        );
8850        assert_eq!(
8851            ScalarValue::min(&DataType::UInt64),
8852            Some(ScalarValue::UInt64(Some(0)))
8853        );
8854
8855        // Test float types
8856        assert_eq!(
8857            ScalarValue::min(&DataType::Float32),
8858            Some(ScalarValue::Float32(Some(f32::NEG_INFINITY)))
8859        );
8860        assert_eq!(
8861            ScalarValue::min(&DataType::Float64),
8862            Some(ScalarValue::Float64(Some(f64::NEG_INFINITY)))
8863        );
8864
8865        // Test decimal types
8866        let decimal_min = ScalarValue::min(&DataType::Decimal128(5, 2)).unwrap();
8867        match decimal_min {
8868            ScalarValue::Decimal128(Some(val), 5, 2) => {
8869                assert_eq!(val, -99999); // -999.99 with scale 2
8870            }
8871            _ => panic!("Expected Decimal128"),
8872        }
8873
8874        // Test temporal types
8875        assert_eq!(
8876            ScalarValue::min(&DataType::Date32),
8877            Some(ScalarValue::Date32(Some(i32::MIN)))
8878        );
8879        assert_eq!(
8880            ScalarValue::min(&DataType::Time32(TimeUnit::Second)),
8881            Some(ScalarValue::Time32Second(Some(0)))
8882        );
8883        assert_eq!(
8884            ScalarValue::min(&DataType::Timestamp(TimeUnit::Nanosecond, None)),
8885            Some(ScalarValue::TimestampNanosecond(Some(i64::MIN), None))
8886        );
8887
8888        // Test duration types
8889        assert_eq!(
8890            ScalarValue::min(&DataType::Duration(TimeUnit::Second)),
8891            Some(ScalarValue::DurationSecond(Some(i64::MIN)))
8892        );
8893
8894        // Test unsupported types
8895        assert_eq!(ScalarValue::min(&DataType::Utf8), None);
8896        assert_eq!(ScalarValue::min(&DataType::Binary), None);
8897        assert_eq!(
8898            ScalarValue::min(&DataType::List(Arc::new(Field::new(
8899                "item",
8900                DataType::Int32,
8901                true
8902            )))),
8903            None
8904        );
8905    }
8906
8907    #[test]
8908    fn test_scalar_max() {
8909        // Test integer types
8910        assert_eq!(
8911            ScalarValue::max(&DataType::Int8),
8912            Some(ScalarValue::Int8(Some(i8::MAX)))
8913        );
8914        assert_eq!(
8915            ScalarValue::max(&DataType::Int32),
8916            Some(ScalarValue::Int32(Some(i32::MAX)))
8917        );
8918        assert_eq!(
8919            ScalarValue::max(&DataType::UInt8),
8920            Some(ScalarValue::UInt8(Some(u8::MAX)))
8921        );
8922        assert_eq!(
8923            ScalarValue::max(&DataType::UInt64),
8924            Some(ScalarValue::UInt64(Some(u64::MAX)))
8925        );
8926
8927        // Test float types
8928        assert_eq!(
8929            ScalarValue::max(&DataType::Float32),
8930            Some(ScalarValue::Float32(Some(f32::INFINITY)))
8931        );
8932        assert_eq!(
8933            ScalarValue::max(&DataType::Float64),
8934            Some(ScalarValue::Float64(Some(f64::INFINITY)))
8935        );
8936
8937        // Test decimal types
8938        let decimal_max = ScalarValue::max(&DataType::Decimal128(5, 2)).unwrap();
8939        match decimal_max {
8940            ScalarValue::Decimal128(Some(val), 5, 2) => {
8941                assert_eq!(val, 99999); // 999.99 with scale 2
8942            }
8943            _ => panic!("Expected Decimal128"),
8944        }
8945
8946        // Test temporal types
8947        assert_eq!(
8948            ScalarValue::max(&DataType::Date32),
8949            Some(ScalarValue::Date32(Some(i32::MAX)))
8950        );
8951        assert_eq!(
8952            ScalarValue::max(&DataType::Time32(TimeUnit::Second)),
8953            Some(ScalarValue::Time32Second(Some(86_399))) // 23:59:59
8954        );
8955        assert_eq!(
8956            ScalarValue::max(&DataType::Time64(TimeUnit::Microsecond)),
8957            Some(ScalarValue::Time64Microsecond(Some(86_399_999_999))) // 23:59:59.999999
8958        );
8959        assert_eq!(
8960            ScalarValue::max(&DataType::Timestamp(TimeUnit::Nanosecond, None)),
8961            Some(ScalarValue::TimestampNanosecond(Some(i64::MAX), None))
8962        );
8963
8964        // Test duration types
8965        assert_eq!(
8966            ScalarValue::max(&DataType::Duration(TimeUnit::Millisecond)),
8967            Some(ScalarValue::DurationMillisecond(Some(i64::MAX)))
8968        );
8969
8970        // Test unsupported types
8971        assert_eq!(ScalarValue::max(&DataType::Utf8), None);
8972        assert_eq!(ScalarValue::max(&DataType::Binary), None);
8973        assert_eq!(
8974            ScalarValue::max(&DataType::Struct(Fields::from(vec![Field::new(
8975                "field",
8976                DataType::Int32,
8977                true
8978            )]))),
8979            None
8980        );
8981    }
8982
8983    #[test]
8984    fn test_min_max_float16() {
8985        // Test Float16 min and max
8986        let min_f16 = ScalarValue::min(&DataType::Float16).unwrap();
8987        match min_f16 {
8988            ScalarValue::Float16(Some(val)) => {
8989                assert_eq!(val, f16::NEG_INFINITY);
8990            }
8991            _ => panic!("Expected Float16"),
8992        }
8993
8994        let max_f16 = ScalarValue::max(&DataType::Float16).unwrap();
8995        match max_f16 {
8996            ScalarValue::Float16(Some(val)) => {
8997                assert_eq!(val, f16::INFINITY);
8998            }
8999            _ => panic!("Expected Float16"),
9000        }
9001    }
9002
9003    #[test]
9004    fn test_new_default_interval() {
9005        // Test all interval types
9006        assert_eq!(
9007            ScalarValue::new_default(&DataType::Interval(IntervalUnit::YearMonth))
9008                .unwrap(),
9009            ScalarValue::IntervalYearMonth(Some(0))
9010        );
9011        assert_eq!(
9012            ScalarValue::new_default(&DataType::Interval(IntervalUnit::DayTime)).unwrap(),
9013            ScalarValue::IntervalDayTime(Some(IntervalDayTime::ZERO))
9014        );
9015        assert_eq!(
9016            ScalarValue::new_default(&DataType::Interval(IntervalUnit::MonthDayNano))
9017                .unwrap(),
9018            ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano::ZERO))
9019        );
9020    }
9021
9022    #[test]
9023    fn test_min_max_with_timezone() {
9024        let tz = Some(Arc::from("UTC"));
9025
9026        // Test timestamp with timezone
9027        let min_ts =
9028            ScalarValue::min(&DataType::Timestamp(TimeUnit::Second, tz.clone())).unwrap();
9029        match min_ts {
9030            ScalarValue::TimestampSecond(Some(val), Some(tz_str)) => {
9031                assert_eq!(val, i64::MIN);
9032                assert_eq!(tz_str.as_ref(), "UTC");
9033            }
9034            _ => panic!("Expected TimestampSecond with timezone"),
9035        }
9036
9037        let max_ts =
9038            ScalarValue::max(&DataType::Timestamp(TimeUnit::Millisecond, tz.clone()))
9039                .unwrap();
9040        match max_ts {
9041            ScalarValue::TimestampMillisecond(Some(val), Some(tz_str)) => {
9042                assert_eq!(val, i64::MAX);
9043                assert_eq!(tz_str.as_ref(), "UTC");
9044            }
9045            _ => panic!("Expected TimestampMillisecond with timezone"),
9046        }
9047    }
9048
9049    #[test]
9050    fn test_convert_array_to_scalar_vec() {
9051        // 1: Regular ListArray
9052        let list = ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
9053            Some(vec![Some(1), Some(2)]),
9054            None,
9055            Some(vec![Some(3), None, Some(4)]),
9056        ]);
9057        let converted = ScalarValue::convert_array_to_scalar_vec(&list).unwrap();
9058        assert_eq!(
9059            converted,
9060            vec![
9061                Some(vec![
9062                    ScalarValue::Int64(Some(1)),
9063                    ScalarValue::Int64(Some(2))
9064                ]),
9065                None,
9066                Some(vec![
9067                    ScalarValue::Int64(Some(3)),
9068                    ScalarValue::Int64(None),
9069                    ScalarValue::Int64(Some(4))
9070                ]),
9071            ]
9072        );
9073
9074        // 2: Regular LargeListArray
9075        let large_list = LargeListArray::from_iter_primitive::<Int64Type, _, _>(vec![
9076            Some(vec![Some(1), Some(2)]),
9077            None,
9078            Some(vec![Some(3), None, Some(4)]),
9079        ]);
9080        let converted = ScalarValue::convert_array_to_scalar_vec(&large_list).unwrap();
9081        assert_eq!(
9082            converted,
9083            vec![
9084                Some(vec![
9085                    ScalarValue::Int64(Some(1)),
9086                    ScalarValue::Int64(Some(2))
9087                ]),
9088                None,
9089                Some(vec![
9090                    ScalarValue::Int64(Some(3)),
9091                    ScalarValue::Int64(None),
9092                    ScalarValue::Int64(Some(4))
9093                ]),
9094            ]
9095        );
9096
9097        // 3: Funky (null slot has non-zero list offsets)
9098        // Offsets + Values looks like this: [[1, 2], [3, 4], [5]]
9099        // But with NullBuffer it's like this: [[1, 2], NULL, [5]]
9100        let funky = ListArray::new(
9101            Field::new_list_field(DataType::Int64, true).into(),
9102            OffsetBuffer::new(vec![0, 2, 4, 5].into()),
9103            Arc::new(Int64Array::from(vec![1, 2, 3, 4, 5, 6])),
9104            Some(NullBuffer::from(vec![true, false, true])),
9105        );
9106        let converted = ScalarValue::convert_array_to_scalar_vec(&funky).unwrap();
9107        assert_eq!(
9108            converted,
9109            vec![
9110                Some(vec![
9111                    ScalarValue::Int64(Some(1)),
9112                    ScalarValue::Int64(Some(2))
9113                ]),
9114                None,
9115                Some(vec![ScalarValue::Int64(Some(5))]),
9116            ]
9117        );
9118
9119        // 4: Offsets + Values looks like this: [[1, 2], [], [5]]
9120        // But with NullBuffer it's like this: [[1, 2], NULL, [5]]
9121        // The converted result is: [[1, 2], None, [5]]
9122        let array4 = ListArray::new(
9123            Field::new_list_field(DataType::Int64, true).into(),
9124            OffsetBuffer::new(vec![0, 2, 2, 5].into()),
9125            Arc::new(Int64Array::from(vec![1, 2, 3, 4, 5, 6])),
9126            Some(NullBuffer::from(vec![true, false, true])),
9127        );
9128        let converted = ScalarValue::convert_array_to_scalar_vec(&array4).unwrap();
9129        assert_eq!(
9130            converted,
9131            vec![
9132                Some(vec![
9133                    ScalarValue::Int64(Some(1)),
9134                    ScalarValue::Int64(Some(2))
9135                ]),
9136                None,
9137                Some(vec![
9138                    ScalarValue::Int64(Some(3)),
9139                    ScalarValue::Int64(Some(4)),
9140                    ScalarValue::Int64(Some(5)),
9141                ]),
9142            ]
9143        );
9144
9145        // 5: Offsets + Values looks like this: [[1, 2], [], [5]]
9146        // Same as 4, but the middle array is not null, so after conversion it's empty.
9147        let array5 = ListArray::new(
9148            Field::new_list_field(DataType::Int64, true).into(),
9149            OffsetBuffer::new(vec![0, 2, 2, 5].into()),
9150            Arc::new(Int64Array::from(vec![1, 2, 3, 4, 5, 6])),
9151            Some(NullBuffer::from(vec![true, true, true])),
9152        );
9153        let converted = ScalarValue::convert_array_to_scalar_vec(&array5).unwrap();
9154        assert_eq!(
9155            converted,
9156            vec![
9157                Some(vec![
9158                    ScalarValue::Int64(Some(1)),
9159                    ScalarValue::Int64(Some(2))
9160                ]),
9161                Some(vec![]),
9162                Some(vec![
9163                    ScalarValue::Int64(Some(3)),
9164                    ScalarValue::Int64(Some(4)),
9165                    ScalarValue::Int64(Some(5)),
9166                ]),
9167            ]
9168        );
9169    }
9170}