1mod cache;
21mod consts;
22mod struct_builder;
23
24use std::borrow::Borrow;
25use std::cmp::Ordering;
26use std::collections::{HashSet, VecDeque};
27use std::convert::Infallible;
28use std::fmt;
29use std::hash::Hash;
30use std::hash::Hasher;
31use std::iter::repeat_n;
32use std::mem::{size_of, size_of_val};
33use std::str::FromStr;
34use std::sync::Arc;
35
36use crate::cast::{
37 as_binary_array, as_binary_view_array, as_boolean_array, as_date32_array,
38 as_date64_array, as_decimal128_array, as_decimal256_array, as_decimal32_array,
39 as_decimal64_array, as_dictionary_array, as_duration_microsecond_array,
40 as_duration_millisecond_array, as_duration_nanosecond_array,
41 as_duration_second_array, as_fixed_size_binary_array, as_fixed_size_list_array,
42 as_float16_array, as_float32_array, as_float64_array, as_int16_array, as_int32_array,
43 as_int64_array, as_int8_array, as_interval_dt_array, as_interval_mdn_array,
44 as_interval_ym_array, as_large_binary_array, as_large_list_array,
45 as_large_string_array, as_string_array, as_string_view_array,
46 as_time32_millisecond_array, as_time32_second_array, as_time64_microsecond_array,
47 as_time64_nanosecond_array, as_timestamp_microsecond_array,
48 as_timestamp_millisecond_array, as_timestamp_nanosecond_array,
49 as_timestamp_second_array, as_uint16_array, as_uint32_array, as_uint64_array,
50 as_uint8_array, as_union_array,
51};
52use crate::error::{DataFusionError, Result, _exec_err, _internal_err, _not_impl_err};
53use crate::format::DEFAULT_CAST_OPTIONS;
54use crate::hash_utils::create_hashes;
55use crate::utils::SingleRowListArrayBuilder;
56use crate::{_internal_datafusion_err, arrow_datafusion_err};
57use arrow::array::{
58 new_empty_array, new_null_array, Array, ArrayData, ArrayRef, ArrowNativeTypeOp,
59 ArrowPrimitiveType, AsArray, BinaryArray, BinaryViewArray, BooleanArray, Date32Array,
60 Date64Array, Decimal128Array, Decimal256Array, Decimal32Array, Decimal64Array,
61 DictionaryArray, DurationMicrosecondArray, DurationMillisecondArray,
62 DurationNanosecondArray, DurationSecondArray, FixedSizeBinaryArray,
63 FixedSizeListArray, Float16Array, Float32Array, Float64Array, GenericListArray,
64 Int16Array, Int32Array, Int64Array, Int8Array, IntervalDayTimeArray,
65 IntervalMonthDayNanoArray, IntervalYearMonthArray, LargeBinaryArray, LargeListArray,
66 LargeStringArray, ListArray, MapArray, MutableArrayData, OffsetSizeTrait,
67 PrimitiveArray, Scalar, StringArray, StringViewArray, StructArray,
68 Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray,
69 Time64NanosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray,
70 TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt32Array,
71 UInt64Array, UInt8Array, UnionArray,
72};
73use arrow::buffer::{BooleanBuffer, ScalarBuffer};
74use arrow::compute::kernels::cast::{cast_with_options, CastOptions};
75use arrow::compute::kernels::numeric::{
76 add, add_wrapping, div, mul, mul_wrapping, rem, sub, sub_wrapping,
77};
78use arrow::datatypes::{
79 i256, validate_decimal_precision_and_scale, ArrowDictionaryKeyType, ArrowNativeType,
80 ArrowTimestampType, DataType, Date32Type, Decimal128Type, Decimal256Type,
81 Decimal32Type, Decimal64Type, Field, Float32Type, Int16Type, Int32Type, Int64Type,
82 Int8Type, IntervalDayTime, IntervalDayTimeType, IntervalMonthDayNano,
83 IntervalMonthDayNanoType, IntervalUnit, IntervalYearMonthType, TimeUnit,
84 TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType,
85 TimestampSecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type, UnionFields,
86 UnionMode, DECIMAL128_MAX_PRECISION,
87};
88use arrow::util::display::{array_value_to_string, ArrayFormatter, FormatOptions};
89use cache::{get_or_create_cached_key_array, get_or_create_cached_null_array};
90use chrono::{Duration, NaiveDate};
91use half::f16;
92pub use struct_builder::ScalarStructBuilder;
93
94#[derive(Clone)]
226pub enum ScalarValue {
227 Null,
229 Boolean(Option<bool>),
231 Float16(Option<f16>),
233 Float32(Option<f32>),
235 Float64(Option<f64>),
237 Decimal32(Option<i32>, u8, i8),
239 Decimal64(Option<i64>, u8, i8),
241 Decimal128(Option<i128>, u8, i8),
243 Decimal256(Option<i256>, u8, i8),
245 Int8(Option<i8>),
247 Int16(Option<i16>),
249 Int32(Option<i32>),
251 Int64(Option<i64>),
253 UInt8(Option<u8>),
255 UInt16(Option<u16>),
257 UInt32(Option<u32>),
259 UInt64(Option<u64>),
261 Utf8(Option<String>),
263 Utf8View(Option<String>),
265 LargeUtf8(Option<String>),
267 Binary(Option<Vec<u8>>),
269 BinaryView(Option<Vec<u8>>),
271 FixedSizeBinary(i32, Option<Vec<u8>>),
273 LargeBinary(Option<Vec<u8>>),
275 FixedSizeList(Arc<FixedSizeListArray>),
279 List(Arc<ListArray>),
283 LargeList(Arc<LargeListArray>),
285 Struct(Arc<StructArray>),
288 Map(Arc<MapArray>),
290 Date32(Option<i32>),
292 Date64(Option<i64>),
294 Time32Second(Option<i32>),
296 Time32Millisecond(Option<i32>),
298 Time64Microsecond(Option<i64>),
300 Time64Nanosecond(Option<i64>),
302 TimestampSecond(Option<i64>, Option<Arc<str>>),
304 TimestampMillisecond(Option<i64>, Option<Arc<str>>),
306 TimestampMicrosecond(Option<i64>, Option<Arc<str>>),
308 TimestampNanosecond(Option<i64>, Option<Arc<str>>),
310 IntervalYearMonth(Option<i32>),
312 IntervalDayTime(Option<IntervalDayTime>),
315 IntervalMonthDayNano(Option<IntervalMonthDayNano>),
319 DurationSecond(Option<i64>),
321 DurationMillisecond(Option<i64>),
323 DurationMicrosecond(Option<i64>),
325 DurationNanosecond(Option<i64>),
327 Union(Option<(i8, Box<ScalarValue>)>, UnionFields, UnionMode),
332 Dictionary(Box<DataType>, Box<ScalarValue>),
334}
335
336impl Hash for Fl<f16> {
337 fn hash<H: Hasher>(&self, state: &mut H) {
338 self.0.to_bits().hash(state);
339 }
340}
341
342impl PartialEq for ScalarValue {
344 fn eq(&self, other: &Self) -> bool {
345 use ScalarValue::*;
346 match (self, other) {
350 (Decimal32(v1, p1, s1), Decimal32(v2, p2, s2)) => {
351 v1.eq(v2) && p1.eq(p2) && s1.eq(s2)
352 }
353 (Decimal32(_, _, _), _) => false,
354 (Decimal64(v1, p1, s1), Decimal64(v2, p2, s2)) => {
355 v1.eq(v2) && p1.eq(p2) && s1.eq(s2)
356 }
357 (Decimal64(_, _, _), _) => false,
358 (Decimal128(v1, p1, s1), Decimal128(v2, p2, s2)) => {
359 v1.eq(v2) && p1.eq(p2) && s1.eq(s2)
360 }
361 (Decimal128(_, _, _), _) => false,
362 (Decimal256(v1, p1, s1), Decimal256(v2, p2, s2)) => {
363 v1.eq(v2) && p1.eq(p2) && s1.eq(s2)
364 }
365 (Decimal256(_, _, _), _) => false,
366 (Boolean(v1), Boolean(v2)) => v1.eq(v2),
367 (Boolean(_), _) => false,
368 (Float32(v1), Float32(v2)) => match (v1, v2) {
369 (Some(f1), Some(f2)) => f1.to_bits() == f2.to_bits(),
370 _ => v1.eq(v2),
371 },
372 (Float16(v1), Float16(v2)) => match (v1, v2) {
373 (Some(f1), Some(f2)) => f1.to_bits() == f2.to_bits(),
374 _ => v1.eq(v2),
375 },
376 (Float32(_), _) => false,
377 (Float16(_), _) => false,
378 (Float64(v1), Float64(v2)) => match (v1, v2) {
379 (Some(f1), Some(f2)) => f1.to_bits() == f2.to_bits(),
380 _ => v1.eq(v2),
381 },
382 (Float64(_), _) => false,
383 (Int8(v1), Int8(v2)) => v1.eq(v2),
384 (Int8(_), _) => false,
385 (Int16(v1), Int16(v2)) => v1.eq(v2),
386 (Int16(_), _) => false,
387 (Int32(v1), Int32(v2)) => v1.eq(v2),
388 (Int32(_), _) => false,
389 (Int64(v1), Int64(v2)) => v1.eq(v2),
390 (Int64(_), _) => false,
391 (UInt8(v1), UInt8(v2)) => v1.eq(v2),
392 (UInt8(_), _) => false,
393 (UInt16(v1), UInt16(v2)) => v1.eq(v2),
394 (UInt16(_), _) => false,
395 (UInt32(v1), UInt32(v2)) => v1.eq(v2),
396 (UInt32(_), _) => false,
397 (UInt64(v1), UInt64(v2)) => v1.eq(v2),
398 (UInt64(_), _) => false,
399 (Utf8(v1), Utf8(v2)) => v1.eq(v2),
400 (Utf8(_), _) => false,
401 (Utf8View(v1), Utf8View(v2)) => v1.eq(v2),
402 (Utf8View(_), _) => false,
403 (LargeUtf8(v1), LargeUtf8(v2)) => v1.eq(v2),
404 (LargeUtf8(_), _) => false,
405 (Binary(v1), Binary(v2)) => v1.eq(v2),
406 (Binary(_), _) => false,
407 (BinaryView(v1), BinaryView(v2)) => v1.eq(v2),
408 (BinaryView(_), _) => false,
409 (FixedSizeBinary(_, v1), FixedSizeBinary(_, v2)) => v1.eq(v2),
410 (FixedSizeBinary(_, _), _) => false,
411 (LargeBinary(v1), LargeBinary(v2)) => v1.eq(v2),
412 (LargeBinary(_), _) => false,
413 (FixedSizeList(v1), FixedSizeList(v2)) => v1.eq(v2),
414 (FixedSizeList(_), _) => false,
415 (List(v1), List(v2)) => v1.eq(v2),
416 (List(_), _) => false,
417 (LargeList(v1), LargeList(v2)) => v1.eq(v2),
418 (LargeList(_), _) => false,
419 (Struct(v1), Struct(v2)) => v1.eq(v2),
420 (Struct(_), _) => false,
421 (Map(v1), Map(v2)) => v1.eq(v2),
422 (Map(_), _) => false,
423 (Date32(v1), Date32(v2)) => v1.eq(v2),
424 (Date32(_), _) => false,
425 (Date64(v1), Date64(v2)) => v1.eq(v2),
426 (Date64(_), _) => false,
427 (Time32Second(v1), Time32Second(v2)) => v1.eq(v2),
428 (Time32Second(_), _) => false,
429 (Time32Millisecond(v1), Time32Millisecond(v2)) => v1.eq(v2),
430 (Time32Millisecond(_), _) => false,
431 (Time64Microsecond(v1), Time64Microsecond(v2)) => v1.eq(v2),
432 (Time64Microsecond(_), _) => false,
433 (Time64Nanosecond(v1), Time64Nanosecond(v2)) => v1.eq(v2),
434 (Time64Nanosecond(_), _) => false,
435 (TimestampSecond(v1, _), TimestampSecond(v2, _)) => v1.eq(v2),
436 (TimestampSecond(_, _), _) => false,
437 (TimestampMillisecond(v1, _), TimestampMillisecond(v2, _)) => v1.eq(v2),
438 (TimestampMillisecond(_, _), _) => false,
439 (TimestampMicrosecond(v1, _), TimestampMicrosecond(v2, _)) => v1.eq(v2),
440 (TimestampMicrosecond(_, _), _) => false,
441 (TimestampNanosecond(v1, _), TimestampNanosecond(v2, _)) => v1.eq(v2),
442 (TimestampNanosecond(_, _), _) => false,
443 (DurationSecond(v1), DurationSecond(v2)) => v1.eq(v2),
444 (DurationSecond(_), _) => false,
445 (DurationMillisecond(v1), DurationMillisecond(v2)) => v1.eq(v2),
446 (DurationMillisecond(_), _) => false,
447 (DurationMicrosecond(v1), DurationMicrosecond(v2)) => v1.eq(v2),
448 (DurationMicrosecond(_), _) => false,
449 (DurationNanosecond(v1), DurationNanosecond(v2)) => v1.eq(v2),
450 (DurationNanosecond(_), _) => false,
451 (IntervalYearMonth(v1), IntervalYearMonth(v2)) => v1.eq(v2),
452 (IntervalYearMonth(_), _) => false,
453 (IntervalDayTime(v1), IntervalDayTime(v2)) => v1.eq(v2),
454 (IntervalDayTime(_), _) => false,
455 (IntervalMonthDayNano(v1), IntervalMonthDayNano(v2)) => v1.eq(v2),
456 (IntervalMonthDayNano(_), _) => false,
457 (Union(val1, fields1, mode1), Union(val2, fields2, mode2)) => {
458 val1.eq(val2) && fields1.eq(fields2) && mode1.eq(mode2)
459 }
460 (Union(_, _, _), _) => false,
461 (Dictionary(k1, v1), Dictionary(k2, v2)) => k1.eq(k2) && v1.eq(v2),
462 (Dictionary(_, _), _) => false,
463 (Null, Null) => true,
464 (Null, _) => false,
465 }
466 }
467}
468
469impl PartialOrd for ScalarValue {
471 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
472 use ScalarValue::*;
473 match (self, other) {
477 (Decimal32(v1, p1, s1), Decimal32(v2, p2, s2)) => {
478 if p1.eq(p2) && s1.eq(s2) {
479 v1.partial_cmp(v2)
480 } else {
481 None
483 }
484 }
485 (Decimal32(_, _, _), _) => None,
486 (Decimal64(v1, p1, s1), Decimal64(v2, p2, s2)) => {
487 if p1.eq(p2) && s1.eq(s2) {
488 v1.partial_cmp(v2)
489 } else {
490 None
492 }
493 }
494 (Decimal64(_, _, _), _) => None,
495 (Decimal128(v1, p1, s1), Decimal128(v2, p2, s2)) => {
496 if p1.eq(p2) && s1.eq(s2) {
497 v1.partial_cmp(v2)
498 } else {
499 None
501 }
502 }
503 (Decimal128(_, _, _), _) => None,
504 (Decimal256(v1, p1, s1), Decimal256(v2, p2, s2)) => {
505 if p1.eq(p2) && s1.eq(s2) {
506 v1.partial_cmp(v2)
507 } else {
508 None
510 }
511 }
512 (Decimal256(_, _, _), _) => None,
513 (Boolean(v1), Boolean(v2)) => v1.partial_cmp(v2),
514 (Boolean(_), _) => None,
515 (Float32(v1), Float32(v2)) => match (v1, v2) {
516 (Some(f1), Some(f2)) => Some(f1.total_cmp(f2)),
517 _ => v1.partial_cmp(v2),
518 },
519 (Float16(v1), Float16(v2)) => match (v1, v2) {
520 (Some(f1), Some(f2)) => Some(f1.total_cmp(f2)),
521 _ => v1.partial_cmp(v2),
522 },
523 (Float32(_), _) => None,
524 (Float16(_), _) => None,
525 (Float64(v1), Float64(v2)) => match (v1, v2) {
526 (Some(f1), Some(f2)) => Some(f1.total_cmp(f2)),
527 _ => v1.partial_cmp(v2),
528 },
529 (Float64(_), _) => None,
530 (Int8(v1), Int8(v2)) => v1.partial_cmp(v2),
531 (Int8(_), _) => None,
532 (Int16(v1), Int16(v2)) => v1.partial_cmp(v2),
533 (Int16(_), _) => None,
534 (Int32(v1), Int32(v2)) => v1.partial_cmp(v2),
535 (Int32(_), _) => None,
536 (Int64(v1), Int64(v2)) => v1.partial_cmp(v2),
537 (Int64(_), _) => None,
538 (UInt8(v1), UInt8(v2)) => v1.partial_cmp(v2),
539 (UInt8(_), _) => None,
540 (UInt16(v1), UInt16(v2)) => v1.partial_cmp(v2),
541 (UInt16(_), _) => None,
542 (UInt32(v1), UInt32(v2)) => v1.partial_cmp(v2),
543 (UInt32(_), _) => None,
544 (UInt64(v1), UInt64(v2)) => v1.partial_cmp(v2),
545 (UInt64(_), _) => None,
546 (Utf8(v1), Utf8(v2)) => v1.partial_cmp(v2),
547 (Utf8(_), _) => None,
548 (LargeUtf8(v1), LargeUtf8(v2)) => v1.partial_cmp(v2),
549 (LargeUtf8(_), _) => None,
550 (Utf8View(v1), Utf8View(v2)) => v1.partial_cmp(v2),
551 (Utf8View(_), _) => None,
552 (Binary(v1), Binary(v2)) => v1.partial_cmp(v2),
553 (Binary(_), _) => None,
554 (BinaryView(v1), BinaryView(v2)) => v1.partial_cmp(v2),
555 (BinaryView(_), _) => None,
556 (FixedSizeBinary(_, v1), FixedSizeBinary(_, v2)) => v1.partial_cmp(v2),
557 (FixedSizeBinary(_, _), _) => None,
558 (LargeBinary(v1), LargeBinary(v2)) => v1.partial_cmp(v2),
559 (LargeBinary(_), _) => None,
560 (List(arr1), List(arr2)) => partial_cmp_list(arr1.as_ref(), arr2.as_ref()),
562 (FixedSizeList(arr1), FixedSizeList(arr2)) => {
563 partial_cmp_list(arr1.as_ref(), arr2.as_ref())
564 }
565 (LargeList(arr1), LargeList(arr2)) => {
566 partial_cmp_list(arr1.as_ref(), arr2.as_ref())
567 }
568 (List(_), _) | (LargeList(_), _) | (FixedSizeList(_), _) => None,
569 (Struct(struct_arr1), Struct(struct_arr2)) => {
570 partial_cmp_struct(struct_arr1.as_ref(), struct_arr2.as_ref())
571 }
572 (Struct(_), _) => None,
573 (Map(map_arr1), Map(map_arr2)) => partial_cmp_map(map_arr1, map_arr2),
574 (Map(_), _) => None,
575 (Date32(v1), Date32(v2)) => v1.partial_cmp(v2),
576 (Date32(_), _) => None,
577 (Date64(v1), Date64(v2)) => v1.partial_cmp(v2),
578 (Date64(_), _) => None,
579 (Time32Second(v1), Time32Second(v2)) => v1.partial_cmp(v2),
580 (Time32Second(_), _) => None,
581 (Time32Millisecond(v1), Time32Millisecond(v2)) => v1.partial_cmp(v2),
582 (Time32Millisecond(_), _) => None,
583 (Time64Microsecond(v1), Time64Microsecond(v2)) => v1.partial_cmp(v2),
584 (Time64Microsecond(_), _) => None,
585 (Time64Nanosecond(v1), Time64Nanosecond(v2)) => v1.partial_cmp(v2),
586 (Time64Nanosecond(_), _) => None,
587 (TimestampSecond(v1, _), TimestampSecond(v2, _)) => v1.partial_cmp(v2),
588 (TimestampSecond(_, _), _) => None,
589 (TimestampMillisecond(v1, _), TimestampMillisecond(v2, _)) => {
590 v1.partial_cmp(v2)
591 }
592 (TimestampMillisecond(_, _), _) => None,
593 (TimestampMicrosecond(v1, _), TimestampMicrosecond(v2, _)) => {
594 v1.partial_cmp(v2)
595 }
596 (TimestampMicrosecond(_, _), _) => None,
597 (TimestampNanosecond(v1, _), TimestampNanosecond(v2, _)) => {
598 v1.partial_cmp(v2)
599 }
600 (TimestampNanosecond(_, _), _) => None,
601 (IntervalYearMonth(v1), IntervalYearMonth(v2)) => v1.partial_cmp(v2),
602 (IntervalYearMonth(_), _) => None,
603 (IntervalDayTime(v1), IntervalDayTime(v2)) => v1.partial_cmp(v2),
604 (IntervalDayTime(_), _) => None,
605 (IntervalMonthDayNano(v1), IntervalMonthDayNano(v2)) => v1.partial_cmp(v2),
606 (IntervalMonthDayNano(_), _) => None,
607 (DurationSecond(v1), DurationSecond(v2)) => v1.partial_cmp(v2),
608 (DurationSecond(_), _) => None,
609 (DurationMillisecond(v1), DurationMillisecond(v2)) => v1.partial_cmp(v2),
610 (DurationMillisecond(_), _) => None,
611 (DurationMicrosecond(v1), DurationMicrosecond(v2)) => v1.partial_cmp(v2),
612 (DurationMicrosecond(_), _) => None,
613 (DurationNanosecond(v1), DurationNanosecond(v2)) => v1.partial_cmp(v2),
614 (DurationNanosecond(_), _) => None,
615 (Union(v1, t1, m1), Union(v2, t2, m2)) => {
616 if t1.eq(t2) && m1.eq(m2) {
617 v1.partial_cmp(v2)
618 } else {
619 None
620 }
621 }
622 (Union(_, _, _), _) => None,
623 (Dictionary(k1, v1), Dictionary(k2, v2)) => {
624 if k1 == k2 {
626 v1.partial_cmp(v2)
627 } else {
628 None
629 }
630 }
631 (Dictionary(_, _), _) => None,
632 (Null, Null) => Some(Ordering::Equal),
633 (Null, _) => None,
634 }
635 }
636}
637
638fn first_array_for_list(arr: &dyn Array) -> ArrayRef {
641 assert_eq!(arr.len(), 1);
642 if let Some(arr) = arr.as_list_opt::<i32>() {
643 arr.value(0)
644 } else if let Some(arr) = arr.as_list_opt::<i64>() {
645 arr.value(0)
646 } else if let Some(arr) = arr.as_fixed_size_list_opt() {
647 arr.value(0)
648 } else {
649 unreachable!("Since only List / LargeList / FixedSizeList are supported, this should never happen")
650 }
651}
652
653fn partial_cmp_list(arr1: &dyn Array, arr2: &dyn Array) -> Option<Ordering> {
655 if arr1.data_type() != arr2.data_type() {
656 return None;
657 }
658 let arr1 = first_array_for_list(arr1);
659 let arr2 = first_array_for_list(arr2);
660
661 let min_length = arr1.len().min(arr2.len());
662 let arr1_trimmed = arr1.slice(0, min_length);
663 let arr2_trimmed = arr2.slice(0, min_length);
664
665 let lt_res = arrow::compute::kernels::cmp::lt(&arr1_trimmed, &arr2_trimmed).ok()?;
666 let eq_res = arrow::compute::kernels::cmp::eq(&arr1_trimmed, &arr2_trimmed).ok()?;
667
668 for j in 0..lt_res.len() {
669 if arr1_trimmed.is_null(j) && !arr2_trimmed.is_null(j) {
677 return Some(Ordering::Greater);
678 }
679 if !arr1_trimmed.is_null(j) && arr2_trimmed.is_null(j) {
680 return Some(Ordering::Less);
681 }
682
683 if lt_res.is_valid(j) && lt_res.value(j) {
684 return Some(Ordering::Less);
685 }
686 if eq_res.is_valid(j) && !eq_res.value(j) {
687 return Some(Ordering::Greater);
688 }
689 }
690
691 Some(arr1.len().cmp(&arr2.len()))
692}
693
694fn flatten<'a>(array: &'a StructArray, columns: &mut Vec<&'a ArrayRef>) {
695 for i in 0..array.num_columns() {
696 let column = array.column(i);
697 if let Some(nested_struct) = column.as_any().downcast_ref::<StructArray>() {
698 flatten(nested_struct, columns);
700 } else {
701 columns.push(column);
703 }
704 }
705}
706
707pub fn partial_cmp_struct(s1: &StructArray, s2: &StructArray) -> Option<Ordering> {
708 if s1.len() != s2.len() {
709 return None;
710 }
711
712 if s1.data_type() != s2.data_type() {
713 return None;
714 }
715
716 let mut expanded_columns1 = Vec::with_capacity(s1.num_columns());
717 let mut expanded_columns2 = Vec::with_capacity(s2.num_columns());
718
719 flatten(s1, &mut expanded_columns1);
720 flatten(s2, &mut expanded_columns2);
721
722 for col_index in 0..expanded_columns1.len() {
723 let arr1 = expanded_columns1[col_index];
724 let arr2 = expanded_columns2[col_index];
725
726 let lt_res = arrow::compute::kernels::cmp::lt(arr1, arr2).ok()?;
727 let eq_res = arrow::compute::kernels::cmp::eq(arr1, arr2).ok()?;
728
729 for j in 0..lt_res.len() {
730 if lt_res.is_valid(j) && lt_res.value(j) {
731 return Some(Ordering::Less);
732 }
733 if eq_res.is_valid(j) && !eq_res.value(j) {
734 return Some(Ordering::Greater);
735 }
736 }
737 }
738 Some(Ordering::Equal)
739}
740
741fn partial_cmp_map(m1: &Arc<MapArray>, m2: &Arc<MapArray>) -> Option<Ordering> {
742 if m1.len() != m2.len() {
743 return None;
744 }
745
746 if m1.data_type() != m2.data_type() {
747 return None;
748 }
749
750 for col_index in 0..m1.len() {
751 let arr1 = m1.entries().column(col_index);
752 let arr2 = m2.entries().column(col_index);
753
754 let lt_res = arrow::compute::kernels::cmp::lt(arr1, arr2).ok()?;
755 let eq_res = arrow::compute::kernels::cmp::eq(arr1, arr2).ok()?;
756
757 for j in 0..lt_res.len() {
758 if lt_res.is_valid(j) && lt_res.value(j) {
759 return Some(Ordering::Less);
760 }
761 if eq_res.is_valid(j) && !eq_res.value(j) {
762 return Some(Ordering::Greater);
763 }
764 }
765 }
766 Some(Ordering::Equal)
767}
768
769impl Eq for ScalarValue {}
770
771struct Fl<T>(T);
773
774macro_rules! hash_float_value {
775 ($(($t:ty, $i:ty)),+) => {
776 $(impl std::hash::Hash for Fl<$t> {
777 #[inline]
778 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
779 state.write(&<$i>::from_ne_bytes(self.0.to_ne_bytes()).to_ne_bytes())
780 }
781 })+
782 };
783}
784
785hash_float_value!((f64, u64), (f32, u32));
786
787impl Hash for ScalarValue {
793 fn hash<H: Hasher>(&self, state: &mut H) {
794 use ScalarValue::*;
795 match self {
796 Decimal32(v, p, s) => {
797 v.hash(state);
798 p.hash(state);
799 s.hash(state)
800 }
801 Decimal64(v, p, s) => {
802 v.hash(state);
803 p.hash(state);
804 s.hash(state)
805 }
806 Decimal128(v, p, s) => {
807 v.hash(state);
808 p.hash(state);
809 s.hash(state)
810 }
811 Decimal256(v, p, s) => {
812 v.hash(state);
813 p.hash(state);
814 s.hash(state)
815 }
816 Boolean(v) => v.hash(state),
817 Float16(v) => v.map(Fl).hash(state),
818 Float32(v) => v.map(Fl).hash(state),
819 Float64(v) => v.map(Fl).hash(state),
820 Int8(v) => v.hash(state),
821 Int16(v) => v.hash(state),
822 Int32(v) => v.hash(state),
823 Int64(v) => v.hash(state),
824 UInt8(v) => v.hash(state),
825 UInt16(v) => v.hash(state),
826 UInt32(v) => v.hash(state),
827 UInt64(v) => v.hash(state),
828 Utf8(v) | LargeUtf8(v) | Utf8View(v) => v.hash(state),
829 Binary(v) | FixedSizeBinary(_, v) | LargeBinary(v) | BinaryView(v) => {
830 v.hash(state)
831 }
832 List(arr) => {
833 hash_nested_array(arr.to_owned() as ArrayRef, state);
834 }
835 LargeList(arr) => {
836 hash_nested_array(arr.to_owned() as ArrayRef, state);
837 }
838 FixedSizeList(arr) => {
839 hash_nested_array(arr.to_owned() as ArrayRef, state);
840 }
841 Struct(arr) => {
842 hash_nested_array(arr.to_owned() as ArrayRef, state);
843 }
844 Map(arr) => {
845 hash_nested_array(arr.to_owned() as ArrayRef, state);
846 }
847 Date32(v) => v.hash(state),
848 Date64(v) => v.hash(state),
849 Time32Second(v) => v.hash(state),
850 Time32Millisecond(v) => v.hash(state),
851 Time64Microsecond(v) => v.hash(state),
852 Time64Nanosecond(v) => v.hash(state),
853 TimestampSecond(v, _) => v.hash(state),
854 TimestampMillisecond(v, _) => v.hash(state),
855 TimestampMicrosecond(v, _) => v.hash(state),
856 TimestampNanosecond(v, _) => v.hash(state),
857 DurationSecond(v) => v.hash(state),
858 DurationMillisecond(v) => v.hash(state),
859 DurationMicrosecond(v) => v.hash(state),
860 DurationNanosecond(v) => v.hash(state),
861 IntervalYearMonth(v) => v.hash(state),
862 IntervalDayTime(v) => v.hash(state),
863 IntervalMonthDayNano(v) => v.hash(state),
864 Union(v, t, m) => {
865 v.hash(state);
866 t.hash(state);
867 m.hash(state);
868 }
869 Dictionary(k, v) => {
870 k.hash(state);
871 v.hash(state);
872 }
873 Null => 1.hash(state),
875 }
876 }
877}
878
879fn hash_nested_array<H: Hasher>(arr: ArrayRef, state: &mut H) {
880 let len = arr.len();
881 let arrays = vec![arr];
882 let hashes_buffer = &mut vec![0; len];
883 let random_state = ahash::RandomState::with_seeds(0, 0, 0, 0);
884 let hashes = create_hashes(&arrays, &random_state, hashes_buffer).unwrap();
885 hashes.hash(state);
887}
888
889#[inline]
896pub fn get_dict_value<K: ArrowDictionaryKeyType>(
897 array: &dyn Array,
898 index: usize,
899) -> Result<(&ArrayRef, Option<usize>)> {
900 let dict_array = as_dictionary_array::<K>(array)?;
901 Ok((dict_array.values(), dict_array.key(index)))
902}
903
904fn dict_from_scalar<K: ArrowDictionaryKeyType>(
907 value: &ScalarValue,
908 size: usize,
909) -> Result<ArrayRef> {
910 let values_array = value.to_array_of_size(1)?;
912
913 let key_array: PrimitiveArray<K> =
916 get_or_create_cached_key_array::<K>(size, value.is_null());
917
918 Ok(Arc::new(
924 DictionaryArray::<K>::try_new(key_array, values_array)?, ))
926}
927
928pub fn dict_from_values<K: ArrowDictionaryKeyType>(
943 values_array: ArrayRef,
944) -> Result<ArrayRef> {
945 let key_array: PrimitiveArray<K> = (0..values_array.len())
948 .map(|index| {
949 if values_array.is_valid(index) {
950 let native_index = K::Native::from_usize(index).ok_or_else(|| {
951 _internal_datafusion_err!(
952 "Can not create index of type {} from value {index}",
953 K::DATA_TYPE
954 )
955 })?;
956 Ok(Some(native_index))
957 } else {
958 Ok(None)
959 }
960 })
961 .collect::<Result<Vec<_>>>()?
962 .into_iter()
963 .collect();
964
965 let dict_array = DictionaryArray::<K>::try_new(key_array, values_array)?;
971 Ok(Arc::new(dict_array))
972}
973
974macro_rules! typed_cast_tz {
975 ($array:expr, $index:expr, $array_cast:ident, $SCALAR:ident, $TZ:expr) => {{
976 let array = $array_cast($array)?;
977 Ok::<ScalarValue, DataFusionError>(ScalarValue::$SCALAR(
978 match array.is_null($index) {
979 true => None,
980 false => Some(array.value($index).into()),
981 },
982 $TZ.clone(),
983 ))
984 }};
985}
986
987macro_rules! typed_cast {
988 ($array:expr, $index:expr, $array_cast:ident, $SCALAR:ident) => {{
989 let array = $array_cast($array)?;
990 Ok::<ScalarValue, DataFusionError>(ScalarValue::$SCALAR(
991 match array.is_null($index) {
992 true => None,
993 false => Some(array.value($index).into()),
994 },
995 ))
996 }};
997}
998
999macro_rules! build_array_from_option {
1000 ($DATA_TYPE:ident, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {{
1001 match $EXPR {
1002 Some(value) => Arc::new($ARRAY_TYPE::from_value(*value, $SIZE)),
1003 None => new_null_array(&DataType::$DATA_TYPE, $SIZE),
1004 }
1005 }};
1006 ($DATA_TYPE:ident, $ENUM:expr, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {{
1007 match $EXPR {
1008 Some(value) => Arc::new($ARRAY_TYPE::from_value(*value, $SIZE)),
1009 None => new_null_array(&DataType::$DATA_TYPE($ENUM), $SIZE),
1010 }
1011 }};
1012}
1013
1014macro_rules! build_timestamp_array_from_option {
1015 ($TIME_UNIT:expr, $TZ:expr, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {
1016 match $EXPR {
1017 Some(value) => {
1018 Arc::new($ARRAY_TYPE::from_value(*value, $SIZE).with_timezone_opt($TZ))
1019 }
1020 None => new_null_array(&DataType::Timestamp($TIME_UNIT, $TZ), $SIZE),
1021 }
1022 };
1023}
1024
1025macro_rules! eq_array_primitive {
1026 ($array:expr, $index:expr, $array_cast:ident, $VALUE:expr) => {{
1027 let array = $array_cast($array)?;
1028 let is_valid = array.is_valid($index);
1029 Ok::<bool, DataFusionError>(match $VALUE {
1030 Some(val) => is_valid && &array.value($index) == val,
1031 None => !is_valid,
1032 })
1033 }};
1034}
1035
1036impl ScalarValue {
1037 pub fn new_primitive<T: ArrowPrimitiveType>(
1043 a: Option<T::Native>,
1044 d: &DataType,
1045 ) -> Result<Self> {
1046 match a {
1047 None => d.try_into(),
1048 Some(v) => {
1049 let array = PrimitiveArray::<T>::new(vec![v].into(), None)
1050 .with_data_type(d.clone());
1051 Self::try_from_array(&array, 0)
1052 }
1053 }
1054 }
1055
1056 pub fn try_new_decimal128(value: i128, precision: u8, scale: i8) -> Result<Self> {
1058 if precision <= DECIMAL128_MAX_PRECISION && scale.unsigned_abs() <= precision {
1060 return Ok(ScalarValue::Decimal128(Some(value), precision, scale));
1061 }
1062 _internal_err!(
1063 "Can not new a decimal type ScalarValue for precision {precision} and scale {scale}"
1064 )
1065 }
1066
1067 pub fn try_new_null(data_type: &DataType) -> Result<Self> {
1079 Ok(match data_type {
1080 DataType::Boolean => ScalarValue::Boolean(None),
1081 DataType::Float16 => ScalarValue::Float16(None),
1082 DataType::Float64 => ScalarValue::Float64(None),
1083 DataType::Float32 => ScalarValue::Float32(None),
1084 DataType::Int8 => ScalarValue::Int8(None),
1085 DataType::Int16 => ScalarValue::Int16(None),
1086 DataType::Int32 => ScalarValue::Int32(None),
1087 DataType::Int64 => ScalarValue::Int64(None),
1088 DataType::UInt8 => ScalarValue::UInt8(None),
1089 DataType::UInt16 => ScalarValue::UInt16(None),
1090 DataType::UInt32 => ScalarValue::UInt32(None),
1091 DataType::UInt64 => ScalarValue::UInt64(None),
1092 DataType::Decimal32(precision, scale) => {
1093 ScalarValue::Decimal32(None, *precision, *scale)
1094 }
1095 DataType::Decimal64(precision, scale) => {
1096 ScalarValue::Decimal64(None, *precision, *scale)
1097 }
1098 DataType::Decimal128(precision, scale) => {
1099 ScalarValue::Decimal128(None, *precision, *scale)
1100 }
1101 DataType::Decimal256(precision, scale) => {
1102 ScalarValue::Decimal256(None, *precision, *scale)
1103 }
1104 DataType::Utf8 => ScalarValue::Utf8(None),
1105 DataType::LargeUtf8 => ScalarValue::LargeUtf8(None),
1106 DataType::Utf8View => ScalarValue::Utf8View(None),
1107 DataType::Binary => ScalarValue::Binary(None),
1108 DataType::BinaryView => ScalarValue::BinaryView(None),
1109 DataType::FixedSizeBinary(len) => ScalarValue::FixedSizeBinary(*len, None),
1110 DataType::LargeBinary => ScalarValue::LargeBinary(None),
1111 DataType::Date32 => ScalarValue::Date32(None),
1112 DataType::Date64 => ScalarValue::Date64(None),
1113 DataType::Time32(TimeUnit::Second) => ScalarValue::Time32Second(None),
1114 DataType::Time32(TimeUnit::Millisecond) => {
1115 ScalarValue::Time32Millisecond(None)
1116 }
1117 DataType::Time64(TimeUnit::Microsecond) => {
1118 ScalarValue::Time64Microsecond(None)
1119 }
1120 DataType::Time64(TimeUnit::Nanosecond) => ScalarValue::Time64Nanosecond(None),
1121 DataType::Timestamp(TimeUnit::Second, tz_opt) => {
1122 ScalarValue::TimestampSecond(None, tz_opt.clone())
1123 }
1124 DataType::Timestamp(TimeUnit::Millisecond, tz_opt) => {
1125 ScalarValue::TimestampMillisecond(None, tz_opt.clone())
1126 }
1127 DataType::Timestamp(TimeUnit::Microsecond, tz_opt) => {
1128 ScalarValue::TimestampMicrosecond(None, tz_opt.clone())
1129 }
1130 DataType::Timestamp(TimeUnit::Nanosecond, tz_opt) => {
1131 ScalarValue::TimestampNanosecond(None, tz_opt.clone())
1132 }
1133 DataType::Interval(IntervalUnit::YearMonth) => {
1134 ScalarValue::IntervalYearMonth(None)
1135 }
1136 DataType::Interval(IntervalUnit::DayTime) => {
1137 ScalarValue::IntervalDayTime(None)
1138 }
1139 DataType::Interval(IntervalUnit::MonthDayNano) => {
1140 ScalarValue::IntervalMonthDayNano(None)
1141 }
1142 DataType::Duration(TimeUnit::Second) => ScalarValue::DurationSecond(None),
1143 DataType::Duration(TimeUnit::Millisecond) => {
1144 ScalarValue::DurationMillisecond(None)
1145 }
1146 DataType::Duration(TimeUnit::Microsecond) => {
1147 ScalarValue::DurationMicrosecond(None)
1148 }
1149 DataType::Duration(TimeUnit::Nanosecond) => {
1150 ScalarValue::DurationNanosecond(None)
1151 }
1152 DataType::Dictionary(index_type, value_type) => ScalarValue::Dictionary(
1153 index_type.clone(),
1154 Box::new(value_type.as_ref().try_into()?),
1155 ),
1156 DataType::List(field_ref) => ScalarValue::List(Arc::new(
1158 GenericListArray::new_null(Arc::clone(field_ref), 1),
1159 )),
1160 DataType::LargeList(field_ref) => ScalarValue::LargeList(Arc::new(
1162 GenericListArray::new_null(Arc::clone(field_ref), 1),
1163 )),
1164 DataType::FixedSizeList(field_ref, fixed_length) => {
1166 ScalarValue::FixedSizeList(Arc::new(FixedSizeListArray::new_null(
1167 Arc::clone(field_ref),
1168 *fixed_length,
1169 1,
1170 )))
1171 }
1172 DataType::Struct(fields) => ScalarValue::Struct(
1173 new_null_array(&DataType::Struct(fields.to_owned()), 1)
1174 .as_struct()
1175 .to_owned()
1176 .into(),
1177 ),
1178 DataType::Map(fields, sorted) => ScalarValue::Map(
1179 new_null_array(&DataType::Map(fields.to_owned(), sorted.to_owned()), 1)
1180 .as_map()
1181 .to_owned()
1182 .into(),
1183 ),
1184 DataType::Union(fields, mode) => {
1185 ScalarValue::Union(None, fields.clone(), *mode)
1186 }
1187 DataType::Null => ScalarValue::Null,
1188 _ => {
1189 return _not_impl_err!(
1190 "Can't create a null scalar from data_type \"{data_type}\""
1191 );
1192 }
1193 })
1194 }
1195
1196 pub fn new_utf8(val: impl Into<String>) -> Self {
1198 ScalarValue::from(val.into())
1199 }
1200
1201 pub fn new_utf8view(val: impl Into<String>) -> Self {
1203 ScalarValue::Utf8View(Some(val.into()))
1204 }
1205
1206 pub fn new_interval_ym(years: i32, months: i32) -> Self {
1209 let val = IntervalYearMonthType::make_value(years, months);
1210 ScalarValue::IntervalYearMonth(Some(val))
1211 }
1212
1213 pub fn new_interval_dt(days: i32, millis: i32) -> Self {
1216 let val = IntervalDayTimeType::make_value(days, millis);
1217 Self::IntervalDayTime(Some(val))
1218 }
1219
1220 pub fn new_interval_mdn(months: i32, days: i32, nanos: i64) -> Self {
1223 let val = IntervalMonthDayNanoType::make_value(months, days, nanos);
1224 ScalarValue::IntervalMonthDayNano(Some(val))
1225 }
1226
1227 pub fn new_timestamp<T: ArrowTimestampType>(
1230 value: Option<i64>,
1231 tz_opt: Option<Arc<str>>,
1232 ) -> Self {
1233 match T::UNIT {
1234 TimeUnit::Second => ScalarValue::TimestampSecond(value, tz_opt),
1235 TimeUnit::Millisecond => ScalarValue::TimestampMillisecond(value, tz_opt),
1236 TimeUnit::Microsecond => ScalarValue::TimestampMicrosecond(value, tz_opt),
1237 TimeUnit::Nanosecond => ScalarValue::TimestampNanosecond(value, tz_opt),
1238 }
1239 }
1240
1241 pub fn new_pi(datatype: &DataType) -> Result<ScalarValue> {
1243 match datatype {
1244 DataType::Float32 => Ok(ScalarValue::from(std::f32::consts::PI)),
1245 DataType::Float64 => Ok(ScalarValue::from(std::f64::consts::PI)),
1246 _ => _internal_err!("PI is not supported for data type: {}", datatype),
1247 }
1248 }
1249
1250 pub fn new_pi_upper(datatype: &DataType) -> Result<ScalarValue> {
1252 match datatype {
1253 DataType::Float32 => Ok(ScalarValue::from(consts::PI_UPPER_F32)),
1254 DataType::Float64 => Ok(ScalarValue::from(consts::PI_UPPER_F64)),
1255 _ => {
1256 _internal_err!("PI_UPPER is not supported for data type: {}", datatype)
1257 }
1258 }
1259 }
1260
1261 pub fn new_negative_pi_lower(datatype: &DataType) -> Result<ScalarValue> {
1263 match datatype {
1264 DataType::Float32 => Ok(ScalarValue::from(consts::NEGATIVE_PI_LOWER_F32)),
1265 DataType::Float64 => Ok(ScalarValue::from(consts::NEGATIVE_PI_LOWER_F64)),
1266 _ => {
1267 _internal_err!("-PI_LOWER is not supported for data type: {}", datatype)
1268 }
1269 }
1270 }
1271
1272 pub fn new_frac_pi_2_upper(datatype: &DataType) -> Result<ScalarValue> {
1274 match datatype {
1275 DataType::Float32 => Ok(ScalarValue::from(consts::FRAC_PI_2_UPPER_F32)),
1276 DataType::Float64 => Ok(ScalarValue::from(consts::FRAC_PI_2_UPPER_F64)),
1277 _ => {
1278 _internal_err!("PI_UPPER/2 is not supported for data type: {}", datatype)
1279 }
1280 }
1281 }
1282
1283 pub fn new_neg_frac_pi_2_lower(datatype: &DataType) -> Result<ScalarValue> {
1285 match datatype {
1286 DataType::Float32 => {
1287 Ok(ScalarValue::from(consts::NEGATIVE_FRAC_PI_2_LOWER_F32))
1288 }
1289 DataType::Float64 => {
1290 Ok(ScalarValue::from(consts::NEGATIVE_FRAC_PI_2_LOWER_F64))
1291 }
1292 _ => {
1293 _internal_err!("-PI/2_LOWER is not supported for data type: {}", datatype)
1294 }
1295 }
1296 }
1297
1298 pub fn new_negative_pi(datatype: &DataType) -> Result<ScalarValue> {
1300 match datatype {
1301 DataType::Float32 => Ok(ScalarValue::from(-std::f32::consts::PI)),
1302 DataType::Float64 => Ok(ScalarValue::from(-std::f64::consts::PI)),
1303 _ => _internal_err!("-PI is not supported for data type: {}", datatype),
1304 }
1305 }
1306
1307 pub fn new_frac_pi_2(datatype: &DataType) -> Result<ScalarValue> {
1309 match datatype {
1310 DataType::Float32 => Ok(ScalarValue::from(std::f32::consts::FRAC_PI_2)),
1311 DataType::Float64 => Ok(ScalarValue::from(std::f64::consts::FRAC_PI_2)),
1312 _ => _internal_err!("PI/2 is not supported for data type: {}", datatype),
1313 }
1314 }
1315
1316 pub fn new_neg_frac_pi_2(datatype: &DataType) -> Result<ScalarValue> {
1318 match datatype {
1319 DataType::Float32 => Ok(ScalarValue::from(-std::f32::consts::FRAC_PI_2)),
1320 DataType::Float64 => Ok(ScalarValue::from(-std::f64::consts::FRAC_PI_2)),
1321 _ => _internal_err!("-PI/2 is not supported for data type: {}", datatype),
1322 }
1323 }
1324
1325 pub fn new_infinity(datatype: &DataType) -> Result<ScalarValue> {
1327 match datatype {
1328 DataType::Float32 => Ok(ScalarValue::from(f32::INFINITY)),
1329 DataType::Float64 => Ok(ScalarValue::from(f64::INFINITY)),
1330 _ => {
1331 _internal_err!("Infinity is not supported for data type: {}", datatype)
1332 }
1333 }
1334 }
1335
1336 pub fn new_neg_infinity(datatype: &DataType) -> Result<ScalarValue> {
1338 match datatype {
1339 DataType::Float32 => Ok(ScalarValue::from(f32::NEG_INFINITY)),
1340 DataType::Float64 => Ok(ScalarValue::from(f64::NEG_INFINITY)),
1341 _ => {
1342 _internal_err!(
1343 "Negative Infinity is not supported for data type: {}",
1344 datatype
1345 )
1346 }
1347 }
1348 }
1349
1350 pub fn new_zero(datatype: &DataType) -> Result<ScalarValue> {
1352 Ok(match datatype {
1353 DataType::Boolean => ScalarValue::Boolean(Some(false)),
1354 DataType::Int8 => ScalarValue::Int8(Some(0)),
1355 DataType::Int16 => ScalarValue::Int16(Some(0)),
1356 DataType::Int32 => ScalarValue::Int32(Some(0)),
1357 DataType::Int64 => ScalarValue::Int64(Some(0)),
1358 DataType::UInt8 => ScalarValue::UInt8(Some(0)),
1359 DataType::UInt16 => ScalarValue::UInt16(Some(0)),
1360 DataType::UInt32 => ScalarValue::UInt32(Some(0)),
1361 DataType::UInt64 => ScalarValue::UInt64(Some(0)),
1362 DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(0.0))),
1363 DataType::Float32 => ScalarValue::Float32(Some(0.0)),
1364 DataType::Float64 => ScalarValue::Float64(Some(0.0)),
1365 DataType::Decimal32(precision, scale) => {
1366 ScalarValue::Decimal32(Some(0), *precision, *scale)
1367 }
1368 DataType::Decimal64(precision, scale) => {
1369 ScalarValue::Decimal64(Some(0), *precision, *scale)
1370 }
1371 DataType::Decimal128(precision, scale) => {
1372 ScalarValue::Decimal128(Some(0), *precision, *scale)
1373 }
1374 DataType::Decimal256(precision, scale) => {
1375 ScalarValue::Decimal256(Some(i256::ZERO), *precision, *scale)
1376 }
1377 DataType::Timestamp(TimeUnit::Second, tz) => {
1378 ScalarValue::TimestampSecond(Some(0), tz.clone())
1379 }
1380 DataType::Timestamp(TimeUnit::Millisecond, tz) => {
1381 ScalarValue::TimestampMillisecond(Some(0), tz.clone())
1382 }
1383 DataType::Timestamp(TimeUnit::Microsecond, tz) => {
1384 ScalarValue::TimestampMicrosecond(Some(0), tz.clone())
1385 }
1386 DataType::Timestamp(TimeUnit::Nanosecond, tz) => {
1387 ScalarValue::TimestampNanosecond(Some(0), tz.clone())
1388 }
1389 DataType::Time32(TimeUnit::Second) => ScalarValue::Time32Second(Some(0)),
1390 DataType::Time32(TimeUnit::Millisecond) => {
1391 ScalarValue::Time32Millisecond(Some(0))
1392 }
1393 DataType::Time64(TimeUnit::Microsecond) => {
1394 ScalarValue::Time64Microsecond(Some(0))
1395 }
1396 DataType::Time64(TimeUnit::Nanosecond) => {
1397 ScalarValue::Time64Nanosecond(Some(0))
1398 }
1399 DataType::Interval(IntervalUnit::YearMonth) => {
1400 ScalarValue::IntervalYearMonth(Some(0))
1401 }
1402 DataType::Interval(IntervalUnit::DayTime) => {
1403 ScalarValue::IntervalDayTime(Some(IntervalDayTime::ZERO))
1404 }
1405 DataType::Interval(IntervalUnit::MonthDayNano) => {
1406 ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano::ZERO))
1407 }
1408 DataType::Duration(TimeUnit::Second) => ScalarValue::DurationSecond(Some(0)),
1409 DataType::Duration(TimeUnit::Millisecond) => {
1410 ScalarValue::DurationMillisecond(Some(0))
1411 }
1412 DataType::Duration(TimeUnit::Microsecond) => {
1413 ScalarValue::DurationMicrosecond(Some(0))
1414 }
1415 DataType::Duration(TimeUnit::Nanosecond) => {
1416 ScalarValue::DurationNanosecond(Some(0))
1417 }
1418 DataType::Date32 => ScalarValue::Date32(Some(0)),
1419 DataType::Date64 => ScalarValue::Date64(Some(0)),
1420 _ => {
1421 return _not_impl_err!(
1422 "Can't create a zero scalar from data_type \"{datatype}\""
1423 );
1424 }
1425 })
1426 }
1427
1428 pub fn new_default(datatype: &DataType) -> Result<ScalarValue> {
1453 match datatype {
1454 DataType::Null => Ok(ScalarValue::Null),
1456
1457 DataType::Boolean
1459 | DataType::Int8
1460 | DataType::Int16
1461 | DataType::Int32
1462 | DataType::Int64
1463 | DataType::UInt8
1464 | DataType::UInt16
1465 | DataType::UInt32
1466 | DataType::UInt64
1467 | DataType::Float16
1468 | DataType::Float32
1469 | DataType::Float64
1470 | DataType::Decimal128(_, _)
1471 | DataType::Decimal256(_, _)
1472 | DataType::Timestamp(_, _)
1473 | DataType::Time32(_)
1474 | DataType::Time64(_)
1475 | DataType::Interval(_)
1476 | DataType::Duration(_)
1477 | DataType::Date32
1478 | DataType::Date64 => ScalarValue::new_zero(datatype),
1479
1480 DataType::Utf8 => Ok(ScalarValue::Utf8(Some("".to_string()))),
1482 DataType::LargeUtf8 => Ok(ScalarValue::LargeUtf8(Some("".to_string()))),
1483 DataType::Utf8View => Ok(ScalarValue::Utf8View(Some("".to_string()))),
1484
1485 DataType::Binary => Ok(ScalarValue::Binary(Some(vec![]))),
1487 DataType::LargeBinary => Ok(ScalarValue::LargeBinary(Some(vec![]))),
1488 DataType::BinaryView => Ok(ScalarValue::BinaryView(Some(vec![]))),
1489
1490 DataType::FixedSizeBinary(size) => Ok(ScalarValue::FixedSizeBinary(
1492 *size,
1493 Some(vec![0; *size as usize]),
1494 )),
1495
1496 DataType::List(field) => {
1498 let list =
1499 ScalarValue::new_list(&[], field.data_type(), field.is_nullable());
1500 Ok(ScalarValue::List(list))
1501 }
1502 DataType::FixedSizeList(field, _size) => {
1503 let empty_arr = new_empty_array(field.data_type());
1504 let values = Arc::new(
1505 SingleRowListArrayBuilder::new(empty_arr)
1506 .with_nullable(field.is_nullable())
1507 .build_fixed_size_list_array(0),
1508 );
1509 Ok(ScalarValue::FixedSizeList(values))
1510 }
1511 DataType::LargeList(field) => {
1512 let list = ScalarValue::new_large_list(&[], field.data_type());
1513 Ok(ScalarValue::LargeList(list))
1514 }
1515
1516 DataType::Struct(fields) => {
1518 let values = fields
1519 .iter()
1520 .map(|f| ScalarValue::new_default(f.data_type()))
1521 .collect::<Result<Vec<_>>>()?;
1522 Ok(ScalarValue::Struct(Arc::new(StructArray::new(
1523 fields.clone(),
1524 values
1525 .into_iter()
1526 .map(|v| v.to_array())
1527 .collect::<Result<_>>()?,
1528 None,
1529 ))))
1530 }
1531
1532 DataType::Dictionary(key_type, value_type) => Ok(ScalarValue::Dictionary(
1534 key_type.clone(),
1535 Box::new(ScalarValue::new_default(value_type)?),
1536 )),
1537
1538 DataType::Map(field, _) => Ok(ScalarValue::Map(Arc::new(MapArray::from(
1540 ArrayData::new_empty(field.data_type()),
1541 )))),
1542
1543 DataType::Union(fields, mode) => {
1545 if let Some((type_id, field)) = fields.iter().next() {
1546 let default_value = ScalarValue::new_default(field.data_type())?;
1547 Ok(ScalarValue::Union(
1548 Some((type_id, Box::new(default_value))),
1549 fields.clone(),
1550 *mode,
1551 ))
1552 } else {
1553 _internal_err!("Union type must have at least one field")
1554 }
1555 }
1556
1557 _ => {
1559 _not_impl_err!(
1560 "Default value for data_type \"{datatype}\" is not implemented yet"
1561 )
1562 }
1563 }
1564 }
1565
1566 pub fn new_one(datatype: &DataType) -> Result<ScalarValue> {
1568 Ok(match datatype {
1569 DataType::Int8 => ScalarValue::Int8(Some(1)),
1570 DataType::Int16 => ScalarValue::Int16(Some(1)),
1571 DataType::Int32 => ScalarValue::Int32(Some(1)),
1572 DataType::Int64 => ScalarValue::Int64(Some(1)),
1573 DataType::UInt8 => ScalarValue::UInt8(Some(1)),
1574 DataType::UInt16 => ScalarValue::UInt16(Some(1)),
1575 DataType::UInt32 => ScalarValue::UInt32(Some(1)),
1576 DataType::UInt64 => ScalarValue::UInt64(Some(1)),
1577 DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(1.0))),
1578 DataType::Float32 => ScalarValue::Float32(Some(1.0)),
1579 DataType::Float64 => ScalarValue::Float64(Some(1.0)),
1580 DataType::Decimal32(precision, scale) => {
1581 validate_decimal_precision_and_scale::<Decimal32Type>(
1582 *precision, *scale,
1583 )?;
1584 if *scale < 0 {
1585 return _internal_err!("Negative scale is not supported");
1586 }
1587 match 10_i32.checked_pow(*scale as u32) {
1588 Some(value) => {
1589 ScalarValue::Decimal32(Some(value), *precision, *scale)
1590 }
1591 None => return _internal_err!("Unsupported scale {scale}"),
1592 }
1593 }
1594 DataType::Decimal64(precision, scale) => {
1595 validate_decimal_precision_and_scale::<Decimal64Type>(
1596 *precision, *scale,
1597 )?;
1598 if *scale < 0 {
1599 return _internal_err!("Negative scale is not supported");
1600 }
1601 match i64::from(10).checked_pow(*scale as u32) {
1602 Some(value) => {
1603 ScalarValue::Decimal64(Some(value), *precision, *scale)
1604 }
1605 None => return _internal_err!("Unsupported scale {scale}"),
1606 }
1607 }
1608 DataType::Decimal128(precision, scale) => {
1609 validate_decimal_precision_and_scale::<Decimal128Type>(
1610 *precision, *scale,
1611 )?;
1612 if *scale < 0 {
1613 return _internal_err!("Negative scale is not supported");
1614 }
1615 match i128::from(10).checked_pow(*scale as u32) {
1616 Some(value) => {
1617 ScalarValue::Decimal128(Some(value), *precision, *scale)
1618 }
1619 None => return _internal_err!("Unsupported scale {scale}"),
1620 }
1621 }
1622 DataType::Decimal256(precision, scale) => {
1623 validate_decimal_precision_and_scale::<Decimal256Type>(
1624 *precision, *scale,
1625 )?;
1626 if *scale < 0 {
1627 return _internal_err!("Negative scale is not supported");
1628 }
1629 match i256::from(10).checked_pow(*scale as u32) {
1630 Some(value) => {
1631 ScalarValue::Decimal256(Some(value), *precision, *scale)
1632 }
1633 None => return _internal_err!("Unsupported scale {scale}"),
1634 }
1635 }
1636 _ => {
1637 return _not_impl_err!(
1638 "Can't create an one scalar from data_type \"{datatype}\""
1639 );
1640 }
1641 })
1642 }
1643
1644 pub fn new_negative_one(datatype: &DataType) -> Result<ScalarValue> {
1646 Ok(match datatype {
1647 DataType::Int8 | DataType::UInt8 => ScalarValue::Int8(Some(-1)),
1648 DataType::Int16 | DataType::UInt16 => ScalarValue::Int16(Some(-1)),
1649 DataType::Int32 | DataType::UInt32 => ScalarValue::Int32(Some(-1)),
1650 DataType::Int64 | DataType::UInt64 => ScalarValue::Int64(Some(-1)),
1651 DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(-1.0))),
1652 DataType::Float32 => ScalarValue::Float32(Some(-1.0)),
1653 DataType::Float64 => ScalarValue::Float64(Some(-1.0)),
1654 DataType::Decimal32(precision, scale) => {
1655 validate_decimal_precision_and_scale::<Decimal32Type>(
1656 *precision, *scale,
1657 )?;
1658 if *scale < 0 {
1659 return _internal_err!("Negative scale is not supported");
1660 }
1661 match 10_i32.checked_pow(*scale as u32) {
1662 Some(value) => {
1663 ScalarValue::Decimal32(Some(-value), *precision, *scale)
1664 }
1665 None => return _internal_err!("Unsupported scale {scale}"),
1666 }
1667 }
1668 DataType::Decimal64(precision, scale) => {
1669 validate_decimal_precision_and_scale::<Decimal64Type>(
1670 *precision, *scale,
1671 )?;
1672 if *scale < 0 {
1673 return _internal_err!("Negative scale is not supported");
1674 }
1675 match i64::from(10).checked_pow(*scale as u32) {
1676 Some(value) => {
1677 ScalarValue::Decimal64(Some(-value), *precision, *scale)
1678 }
1679 None => return _internal_err!("Unsupported scale {scale}"),
1680 }
1681 }
1682 DataType::Decimal128(precision, scale) => {
1683 validate_decimal_precision_and_scale::<Decimal128Type>(
1684 *precision, *scale,
1685 )?;
1686 if *scale < 0 {
1687 return _internal_err!("Negative scale is not supported");
1688 }
1689 match i128::from(10).checked_pow(*scale as u32) {
1690 Some(value) => {
1691 ScalarValue::Decimal128(Some(-value), *precision, *scale)
1692 }
1693 None => return _internal_err!("Unsupported scale {scale}"),
1694 }
1695 }
1696 DataType::Decimal256(precision, scale) => {
1697 validate_decimal_precision_and_scale::<Decimal256Type>(
1698 *precision, *scale,
1699 )?;
1700 if *scale < 0 {
1701 return _internal_err!("Negative scale is not supported");
1702 }
1703 match i256::from(10).checked_pow(*scale as u32) {
1704 Some(value) => {
1705 ScalarValue::Decimal256(Some(-value), *precision, *scale)
1706 }
1707 None => return _internal_err!("Unsupported scale {scale}"),
1708 }
1709 }
1710 _ => {
1711 return _not_impl_err!(
1712 "Can't create a negative one scalar from data_type \"{datatype}\""
1713 );
1714 }
1715 })
1716 }
1717
1718 pub fn new_ten(datatype: &DataType) -> Result<ScalarValue> {
1719 Ok(match datatype {
1720 DataType::Int8 => ScalarValue::Int8(Some(10)),
1721 DataType::Int16 => ScalarValue::Int16(Some(10)),
1722 DataType::Int32 => ScalarValue::Int32(Some(10)),
1723 DataType::Int64 => ScalarValue::Int64(Some(10)),
1724 DataType::UInt8 => ScalarValue::UInt8(Some(10)),
1725 DataType::UInt16 => ScalarValue::UInt16(Some(10)),
1726 DataType::UInt32 => ScalarValue::UInt32(Some(10)),
1727 DataType::UInt64 => ScalarValue::UInt64(Some(10)),
1728 DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(10.0))),
1729 DataType::Float32 => ScalarValue::Float32(Some(10.0)),
1730 DataType::Float64 => ScalarValue::Float64(Some(10.0)),
1731 DataType::Decimal32(precision, scale) => {
1732 if let Err(err) = validate_decimal_precision_and_scale::<Decimal32Type>(
1733 *precision, *scale,
1734 ) {
1735 return _internal_err!("Invalid precision and scale {err}");
1736 }
1737 if *scale <= 0 {
1738 return _internal_err!("Negative scale is not supported");
1739 }
1740 match 10_i32.checked_pow((*scale + 1) as u32) {
1741 Some(value) => {
1742 ScalarValue::Decimal32(Some(value), *precision, *scale)
1743 }
1744 None => return _internal_err!("Unsupported scale {scale}"),
1745 }
1746 }
1747 DataType::Decimal64(precision, scale) => {
1748 if let Err(err) = validate_decimal_precision_and_scale::<Decimal64Type>(
1749 *precision, *scale,
1750 ) {
1751 return _internal_err!("Invalid precision and scale {err}");
1752 }
1753 if *scale <= 0 {
1754 return _internal_err!("Negative scale is not supported");
1755 }
1756 match i64::from(10).checked_pow((*scale + 1) as u32) {
1757 Some(value) => {
1758 ScalarValue::Decimal64(Some(value), *precision, *scale)
1759 }
1760 None => return _internal_err!("Unsupported scale {scale}"),
1761 }
1762 }
1763 DataType::Decimal128(precision, scale) => {
1764 if let Err(err) = validate_decimal_precision_and_scale::<Decimal128Type>(
1765 *precision, *scale,
1766 ) {
1767 return _internal_err!("Invalid precision and scale {err}");
1768 }
1769 if *scale < 0 {
1770 return _internal_err!("Negative scale is not supported");
1771 }
1772 match i128::from(10).checked_pow((*scale + 1) as u32) {
1773 Some(value) => {
1774 ScalarValue::Decimal128(Some(value), *precision, *scale)
1775 }
1776 None => return _internal_err!("Unsupported scale {scale}"),
1777 }
1778 }
1779 DataType::Decimal256(precision, scale) => {
1780 if let Err(err) = validate_decimal_precision_and_scale::<Decimal256Type>(
1781 *precision, *scale,
1782 ) {
1783 return _internal_err!("Invalid precision and scale {err}");
1784 }
1785 if *scale < 0 {
1786 return _internal_err!("Negative scale is not supported");
1787 }
1788 match i256::from(10).checked_pow((*scale + 1) as u32) {
1789 Some(value) => {
1790 ScalarValue::Decimal256(Some(value), *precision, *scale)
1791 }
1792 None => return _internal_err!("Unsupported scale {scale}"),
1793 }
1794 }
1795 _ => {
1796 return _not_impl_err!(
1797 "Can't create a ten scalar from data_type \"{datatype}\""
1798 );
1799 }
1800 })
1801 }
1802
1803 pub fn data_type(&self) -> DataType {
1805 match self {
1806 ScalarValue::Boolean(_) => DataType::Boolean,
1807 ScalarValue::UInt8(_) => DataType::UInt8,
1808 ScalarValue::UInt16(_) => DataType::UInt16,
1809 ScalarValue::UInt32(_) => DataType::UInt32,
1810 ScalarValue::UInt64(_) => DataType::UInt64,
1811 ScalarValue::Int8(_) => DataType::Int8,
1812 ScalarValue::Int16(_) => DataType::Int16,
1813 ScalarValue::Int32(_) => DataType::Int32,
1814 ScalarValue::Int64(_) => DataType::Int64,
1815 ScalarValue::Decimal32(_, precision, scale) => {
1816 DataType::Decimal32(*precision, *scale)
1817 }
1818 ScalarValue::Decimal64(_, precision, scale) => {
1819 DataType::Decimal64(*precision, *scale)
1820 }
1821 ScalarValue::Decimal128(_, precision, scale) => {
1822 DataType::Decimal128(*precision, *scale)
1823 }
1824 ScalarValue::Decimal256(_, precision, scale) => {
1825 DataType::Decimal256(*precision, *scale)
1826 }
1827 ScalarValue::TimestampSecond(_, tz_opt) => {
1828 DataType::Timestamp(TimeUnit::Second, tz_opt.clone())
1829 }
1830 ScalarValue::TimestampMillisecond(_, tz_opt) => {
1831 DataType::Timestamp(TimeUnit::Millisecond, tz_opt.clone())
1832 }
1833 ScalarValue::TimestampMicrosecond(_, tz_opt) => {
1834 DataType::Timestamp(TimeUnit::Microsecond, tz_opt.clone())
1835 }
1836 ScalarValue::TimestampNanosecond(_, tz_opt) => {
1837 DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone())
1838 }
1839 ScalarValue::Float16(_) => DataType::Float16,
1840 ScalarValue::Float32(_) => DataType::Float32,
1841 ScalarValue::Float64(_) => DataType::Float64,
1842 ScalarValue::Utf8(_) => DataType::Utf8,
1843 ScalarValue::LargeUtf8(_) => DataType::LargeUtf8,
1844 ScalarValue::Utf8View(_) => DataType::Utf8View,
1845 ScalarValue::Binary(_) => DataType::Binary,
1846 ScalarValue::BinaryView(_) => DataType::BinaryView,
1847 ScalarValue::FixedSizeBinary(sz, _) => DataType::FixedSizeBinary(*sz),
1848 ScalarValue::LargeBinary(_) => DataType::LargeBinary,
1849 ScalarValue::List(arr) => arr.data_type().to_owned(),
1850 ScalarValue::LargeList(arr) => arr.data_type().to_owned(),
1851 ScalarValue::FixedSizeList(arr) => arr.data_type().to_owned(),
1852 ScalarValue::Struct(arr) => arr.data_type().to_owned(),
1853 ScalarValue::Map(arr) => arr.data_type().to_owned(),
1854 ScalarValue::Date32(_) => DataType::Date32,
1855 ScalarValue::Date64(_) => DataType::Date64,
1856 ScalarValue::Time32Second(_) => DataType::Time32(TimeUnit::Second),
1857 ScalarValue::Time32Millisecond(_) => DataType::Time32(TimeUnit::Millisecond),
1858 ScalarValue::Time64Microsecond(_) => DataType::Time64(TimeUnit::Microsecond),
1859 ScalarValue::Time64Nanosecond(_) => DataType::Time64(TimeUnit::Nanosecond),
1860 ScalarValue::IntervalYearMonth(_) => {
1861 DataType::Interval(IntervalUnit::YearMonth)
1862 }
1863 ScalarValue::IntervalDayTime(_) => DataType::Interval(IntervalUnit::DayTime),
1864 ScalarValue::IntervalMonthDayNano(_) => {
1865 DataType::Interval(IntervalUnit::MonthDayNano)
1866 }
1867 ScalarValue::DurationSecond(_) => DataType::Duration(TimeUnit::Second),
1868 ScalarValue::DurationMillisecond(_) => {
1869 DataType::Duration(TimeUnit::Millisecond)
1870 }
1871 ScalarValue::DurationMicrosecond(_) => {
1872 DataType::Duration(TimeUnit::Microsecond)
1873 }
1874 ScalarValue::DurationNanosecond(_) => {
1875 DataType::Duration(TimeUnit::Nanosecond)
1876 }
1877 ScalarValue::Union(_, fields, mode) => DataType::Union(fields.clone(), *mode),
1878 ScalarValue::Dictionary(k, v) => {
1879 DataType::Dictionary(k.clone(), Box::new(v.data_type()))
1880 }
1881 ScalarValue::Null => DataType::Null,
1882 }
1883 }
1884
1885 pub fn arithmetic_negate(&self) -> Result<Self> {
1887 fn neg_checked_with_ctx<T: ArrowNativeTypeOp>(
1888 v: T,
1889 ctx: impl Fn() -> String,
1890 ) -> Result<T> {
1891 v.neg_checked()
1892 .map_err(|e| arrow_datafusion_err!(e).context(ctx()))
1893 }
1894 match self {
1895 ScalarValue::Int8(None)
1896 | ScalarValue::Int16(None)
1897 | ScalarValue::Int32(None)
1898 | ScalarValue::Int64(None)
1899 | ScalarValue::Float16(None)
1900 | ScalarValue::Float32(None)
1901 | ScalarValue::Float64(None) => Ok(self.clone()),
1902 ScalarValue::Float16(Some(v)) => {
1903 Ok(ScalarValue::Float16(Some(f16::from_f32(-v.to_f32()))))
1904 }
1905 ScalarValue::Float64(Some(v)) => Ok(ScalarValue::Float64(Some(-v))),
1906 ScalarValue::Float32(Some(v)) => Ok(ScalarValue::Float32(Some(-v))),
1907 ScalarValue::Int8(Some(v)) => Ok(ScalarValue::Int8(Some(v.neg_checked()?))),
1908 ScalarValue::Int16(Some(v)) => Ok(ScalarValue::Int16(Some(v.neg_checked()?))),
1909 ScalarValue::Int32(Some(v)) => Ok(ScalarValue::Int32(Some(v.neg_checked()?))),
1910 ScalarValue::Int64(Some(v)) => Ok(ScalarValue::Int64(Some(v.neg_checked()?))),
1911 ScalarValue::IntervalYearMonth(Some(v)) => Ok(
1912 ScalarValue::IntervalYearMonth(Some(neg_checked_with_ctx(*v, || {
1913 format!("In negation of IntervalYearMonth({v})")
1914 })?)),
1915 ),
1916 ScalarValue::IntervalDayTime(Some(v)) => {
1917 let (days, ms) = IntervalDayTimeType::to_parts(*v);
1918 let val = IntervalDayTimeType::make_value(
1919 neg_checked_with_ctx(days, || {
1920 format!("In negation of days {days} in IntervalDayTime")
1921 })?,
1922 neg_checked_with_ctx(ms, || {
1923 format!("In negation of milliseconds {ms} in IntervalDayTime")
1924 })?,
1925 );
1926 Ok(ScalarValue::IntervalDayTime(Some(val)))
1927 }
1928 ScalarValue::IntervalMonthDayNano(Some(v)) => {
1929 let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(*v);
1930 let val = IntervalMonthDayNanoType::make_value(
1931 neg_checked_with_ctx(months, || {
1932 format!("In negation of months {months} of IntervalMonthDayNano")
1933 })?,
1934 neg_checked_with_ctx(days, || {
1935 format!("In negation of days {days} of IntervalMonthDayNano")
1936 })?,
1937 neg_checked_with_ctx(nanos, || {
1938 format!("In negation of nanos {nanos} of IntervalMonthDayNano")
1939 })?,
1940 );
1941 Ok(ScalarValue::IntervalMonthDayNano(Some(val)))
1942 }
1943 ScalarValue::Decimal32(Some(v), precision, scale) => {
1944 Ok(ScalarValue::Decimal32(
1945 Some(neg_checked_with_ctx(*v, || {
1946 format!("In negation of Decimal32({v}, {precision}, {scale})")
1947 })?),
1948 *precision,
1949 *scale,
1950 ))
1951 }
1952 ScalarValue::Decimal64(Some(v), precision, scale) => {
1953 Ok(ScalarValue::Decimal64(
1954 Some(neg_checked_with_ctx(*v, || {
1955 format!("In negation of Decimal64({v}, {precision}, {scale})")
1956 })?),
1957 *precision,
1958 *scale,
1959 ))
1960 }
1961 ScalarValue::Decimal128(Some(v), precision, scale) => {
1962 Ok(ScalarValue::Decimal128(
1963 Some(neg_checked_with_ctx(*v, || {
1964 format!("In negation of Decimal128({v}, {precision}, {scale})")
1965 })?),
1966 *precision,
1967 *scale,
1968 ))
1969 }
1970 ScalarValue::Decimal256(Some(v), precision, scale) => {
1971 Ok(ScalarValue::Decimal256(
1972 Some(neg_checked_with_ctx(*v, || {
1973 format!("In negation of Decimal256({v}, {precision}, {scale})")
1974 })?),
1975 *precision,
1976 *scale,
1977 ))
1978 }
1979 ScalarValue::TimestampSecond(Some(v), tz) => {
1980 Ok(ScalarValue::TimestampSecond(
1981 Some(neg_checked_with_ctx(*v, || {
1982 format!("In negation of TimestampSecond({v})")
1983 })?),
1984 tz.clone(),
1985 ))
1986 }
1987 ScalarValue::TimestampNanosecond(Some(v), tz) => {
1988 Ok(ScalarValue::TimestampNanosecond(
1989 Some(neg_checked_with_ctx(*v, || {
1990 format!("In negation of TimestampNanoSecond({v})")
1991 })?),
1992 tz.clone(),
1993 ))
1994 }
1995 ScalarValue::TimestampMicrosecond(Some(v), tz) => {
1996 Ok(ScalarValue::TimestampMicrosecond(
1997 Some(neg_checked_with_ctx(*v, || {
1998 format!("In negation of TimestampMicroSecond({v})")
1999 })?),
2000 tz.clone(),
2001 ))
2002 }
2003 ScalarValue::TimestampMillisecond(Some(v), tz) => {
2004 Ok(ScalarValue::TimestampMillisecond(
2005 Some(neg_checked_with_ctx(*v, || {
2006 format!("In negation of TimestampMilliSecond({v})")
2007 })?),
2008 tz.clone(),
2009 ))
2010 }
2011 value => _internal_err!(
2012 "Can not run arithmetic negative on scalar value {value:?}"
2013 ),
2014 }
2015 }
2016
2017 pub fn add<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2022 let r = add_wrapping(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2023 Self::try_from_array(r.as_ref(), 0)
2024 }
2025 pub fn add_checked<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2030 let r = add(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2031 Self::try_from_array(r.as_ref(), 0)
2032 }
2033
2034 pub fn sub<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2039 let r = sub_wrapping(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2040 Self::try_from_array(r.as_ref(), 0)
2041 }
2042
2043 pub fn sub_checked<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2048 let r = sub(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2049 Self::try_from_array(r.as_ref(), 0)
2050 }
2051
2052 pub fn mul<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2057 let r = mul_wrapping(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2058 Self::try_from_array(r.as_ref(), 0)
2059 }
2060
2061 pub fn mul_checked<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2066 let r = mul(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2067 Self::try_from_array(r.as_ref(), 0)
2068 }
2069
2070 pub fn div<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2078 let r = div(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2079 Self::try_from_array(r.as_ref(), 0)
2080 }
2081
2082 pub fn rem<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2090 let r = rem(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2091 Self::try_from_array(r.as_ref(), 0)
2092 }
2093
2094 pub fn is_unsigned(&self) -> bool {
2095 matches!(
2096 self,
2097 ScalarValue::UInt8(_)
2098 | ScalarValue::UInt16(_)
2099 | ScalarValue::UInt32(_)
2100 | ScalarValue::UInt64(_)
2101 )
2102 }
2103
2104 pub fn is_null(&self) -> bool {
2106 match self {
2107 ScalarValue::Boolean(v) => v.is_none(),
2108 ScalarValue::Null => true,
2109 ScalarValue::Float16(v) => v.is_none(),
2110 ScalarValue::Float32(v) => v.is_none(),
2111 ScalarValue::Float64(v) => v.is_none(),
2112 ScalarValue::Decimal32(v, _, _) => v.is_none(),
2113 ScalarValue::Decimal64(v, _, _) => v.is_none(),
2114 ScalarValue::Decimal128(v, _, _) => v.is_none(),
2115 ScalarValue::Decimal256(v, _, _) => v.is_none(),
2116 ScalarValue::Int8(v) => v.is_none(),
2117 ScalarValue::Int16(v) => v.is_none(),
2118 ScalarValue::Int32(v) => v.is_none(),
2119 ScalarValue::Int64(v) => v.is_none(),
2120 ScalarValue::UInt8(v) => v.is_none(),
2121 ScalarValue::UInt16(v) => v.is_none(),
2122 ScalarValue::UInt32(v) => v.is_none(),
2123 ScalarValue::UInt64(v) => v.is_none(),
2124 ScalarValue::Utf8(v)
2125 | ScalarValue::Utf8View(v)
2126 | ScalarValue::LargeUtf8(v) => v.is_none(),
2127 ScalarValue::Binary(v)
2128 | ScalarValue::BinaryView(v)
2129 | ScalarValue::FixedSizeBinary(_, v)
2130 | ScalarValue::LargeBinary(v) => v.is_none(),
2131 ScalarValue::List(arr) => arr.len() == arr.null_count(),
2134 ScalarValue::LargeList(arr) => arr.len() == arr.null_count(),
2135 ScalarValue::FixedSizeList(arr) => arr.len() == arr.null_count(),
2136 ScalarValue::Struct(arr) => arr.len() == arr.null_count(),
2137 ScalarValue::Map(arr) => arr.len() == arr.null_count(),
2138 ScalarValue::Date32(v) => v.is_none(),
2139 ScalarValue::Date64(v) => v.is_none(),
2140 ScalarValue::Time32Second(v) => v.is_none(),
2141 ScalarValue::Time32Millisecond(v) => v.is_none(),
2142 ScalarValue::Time64Microsecond(v) => v.is_none(),
2143 ScalarValue::Time64Nanosecond(v) => v.is_none(),
2144 ScalarValue::TimestampSecond(v, _) => v.is_none(),
2145 ScalarValue::TimestampMillisecond(v, _) => v.is_none(),
2146 ScalarValue::TimestampMicrosecond(v, _) => v.is_none(),
2147 ScalarValue::TimestampNanosecond(v, _) => v.is_none(),
2148 ScalarValue::IntervalYearMonth(v) => v.is_none(),
2149 ScalarValue::IntervalDayTime(v) => v.is_none(),
2150 ScalarValue::IntervalMonthDayNano(v) => v.is_none(),
2151 ScalarValue::DurationSecond(v) => v.is_none(),
2152 ScalarValue::DurationMillisecond(v) => v.is_none(),
2153 ScalarValue::DurationMicrosecond(v) => v.is_none(),
2154 ScalarValue::DurationNanosecond(v) => v.is_none(),
2155 ScalarValue::Union(v, _, _) => match v {
2156 Some((_, s)) => s.is_null(),
2157 None => true,
2158 },
2159 ScalarValue::Dictionary(_, v) => v.is_null(),
2160 }
2161 }
2162
2163 pub fn distance(&self, other: &ScalarValue) -> Option<usize> {
2171 match (self, other) {
2172 (Self::Int8(Some(l)), Self::Int8(Some(r))) => Some(l.abs_diff(*r) as _),
2173 (Self::Int16(Some(l)), Self::Int16(Some(r))) => Some(l.abs_diff(*r) as _),
2174 (Self::Int32(Some(l)), Self::Int32(Some(r))) => Some(l.abs_diff(*r) as _),
2175 (Self::Int64(Some(l)), Self::Int64(Some(r))) => Some(l.abs_diff(*r) as _),
2176 (Self::UInt8(Some(l)), Self::UInt8(Some(r))) => Some(l.abs_diff(*r) as _),
2177 (Self::UInt16(Some(l)), Self::UInt16(Some(r))) => Some(l.abs_diff(*r) as _),
2178 (Self::UInt32(Some(l)), Self::UInt32(Some(r))) => Some(l.abs_diff(*r) as _),
2179 (Self::UInt64(Some(l)), Self::UInt64(Some(r))) => Some(l.abs_diff(*r) as _),
2180 (Self::Float16(Some(l)), Self::Float16(Some(r))) => {
2182 Some((f16::to_f32(*l) - f16::to_f32(*r)).abs().round() as _)
2183 }
2184 (Self::Float32(Some(l)), Self::Float32(Some(r))) => {
2185 Some((l - r).abs().round() as _)
2186 }
2187 (Self::Float64(Some(l)), Self::Float64(Some(r))) => {
2188 Some((l - r).abs().round() as _)
2189 }
2190 (
2191 Self::Decimal128(Some(l), lprecision, lscale),
2192 Self::Decimal128(Some(r), rprecision, rscale),
2193 ) => {
2194 if lprecision == rprecision && lscale == rscale {
2195 l.checked_sub(*r)?.checked_abs()?.to_usize()
2196 } else {
2197 None
2198 }
2199 }
2200 (
2201 Self::Decimal256(Some(l), lprecision, lscale),
2202 Self::Decimal256(Some(r), rprecision, rscale),
2203 ) => {
2204 if lprecision == rprecision && lscale == rscale {
2205 l.checked_sub(*r)?.checked_abs()?.to_usize()
2206 } else {
2207 None
2208 }
2209 }
2210 _ => None,
2211 }
2212 }
2213
2214 pub fn to_array(&self) -> Result<ArrayRef> {
2220 self.to_array_of_size(1)
2221 }
2222
2223 pub fn to_scalar(&self) -> Result<Scalar<ArrayRef>> {
2249 Ok(Scalar::new(self.to_array_of_size(1)?))
2250 }
2251
2252 pub fn iter_to_array(
2279 scalars: impl IntoIterator<Item = ScalarValue>,
2280 ) -> Result<ArrayRef> {
2281 let mut scalars = scalars.into_iter().peekable();
2282
2283 let data_type = match scalars.peek() {
2285 None => {
2286 return _exec_err!("Empty iterator passed to ScalarValue::iter_to_array");
2287 }
2288 Some(sv) => sv.data_type(),
2289 };
2290
2291 macro_rules! build_array_primitive {
2294 ($ARRAY_TY:ident, $SCALAR_TY:ident) => {{
2295 {
2296 let array = scalars.map(|sv| {
2297 if let ScalarValue::$SCALAR_TY(v) = sv {
2298 Ok(v)
2299 } else {
2300 _exec_err!(
2301 "Inconsistent types in ScalarValue::iter_to_array. \
2302 Expected {:?}, got {:?}",
2303 data_type, sv
2304 )
2305 }
2306 })
2307 .collect::<Result<$ARRAY_TY>>()?;
2308 Arc::new(array)
2309 }
2310 }};
2311 }
2312
2313 macro_rules! build_array_primitive_tz {
2314 ($ARRAY_TY:ident, $SCALAR_TY:ident, $TZ:expr) => {{
2315 {
2316 let array = scalars.map(|sv| {
2317 if let ScalarValue::$SCALAR_TY(v, _) = sv {
2318 Ok(v)
2319 } else {
2320 _exec_err!(
2321 "Inconsistent types in ScalarValue::iter_to_array. \
2322 Expected {:?}, got {:?}",
2323 data_type, sv
2324 )
2325 }
2326 })
2327 .collect::<Result<$ARRAY_TY>>()?;
2328 Arc::new(array.with_timezone_opt($TZ.clone()))
2329 }
2330 }};
2331 }
2332
2333 macro_rules! build_array_string {
2336 ($ARRAY_TY:ident, $SCALAR_TY:ident) => {{
2337 {
2338 let array = scalars.map(|sv| {
2339 if let ScalarValue::$SCALAR_TY(v) = sv {
2340 Ok(v)
2341 } else {
2342 _exec_err!(
2343 "Inconsistent types in ScalarValue::iter_to_array. \
2344 Expected {:?}, got {:?}",
2345 data_type, sv
2346 )
2347 }
2348 })
2349 .collect::<Result<$ARRAY_TY>>()?;
2350 Arc::new(array)
2351 }
2352 }};
2353 }
2354
2355 let array: ArrayRef = match &data_type {
2356 DataType::Decimal32(precision, scale) => {
2357 let decimal_array =
2358 ScalarValue::iter_to_decimal32_array(scalars, *precision, *scale)?;
2359 Arc::new(decimal_array)
2360 }
2361 DataType::Decimal64(precision, scale) => {
2362 let decimal_array =
2363 ScalarValue::iter_to_decimal64_array(scalars, *precision, *scale)?;
2364 Arc::new(decimal_array)
2365 }
2366 DataType::Decimal128(precision, scale) => {
2367 let decimal_array =
2368 ScalarValue::iter_to_decimal128_array(scalars, *precision, *scale)?;
2369 Arc::new(decimal_array)
2370 }
2371 DataType::Decimal256(precision, scale) => {
2372 let decimal_array =
2373 ScalarValue::iter_to_decimal256_array(scalars, *precision, *scale)?;
2374 Arc::new(decimal_array)
2375 }
2376 DataType::Null => ScalarValue::iter_to_null_array(scalars)?,
2377 DataType::Boolean => build_array_primitive!(BooleanArray, Boolean),
2378 DataType::Float16 => build_array_primitive!(Float16Array, Float16),
2379 DataType::Float32 => build_array_primitive!(Float32Array, Float32),
2380 DataType::Float64 => build_array_primitive!(Float64Array, Float64),
2381 DataType::Int8 => build_array_primitive!(Int8Array, Int8),
2382 DataType::Int16 => build_array_primitive!(Int16Array, Int16),
2383 DataType::Int32 => build_array_primitive!(Int32Array, Int32),
2384 DataType::Int64 => build_array_primitive!(Int64Array, Int64),
2385 DataType::UInt8 => build_array_primitive!(UInt8Array, UInt8),
2386 DataType::UInt16 => build_array_primitive!(UInt16Array, UInt16),
2387 DataType::UInt32 => build_array_primitive!(UInt32Array, UInt32),
2388 DataType::UInt64 => build_array_primitive!(UInt64Array, UInt64),
2389 DataType::Utf8View => build_array_string!(StringViewArray, Utf8View),
2390 DataType::Utf8 => build_array_string!(StringArray, Utf8),
2391 DataType::LargeUtf8 => build_array_string!(LargeStringArray, LargeUtf8),
2392 DataType::BinaryView => build_array_string!(BinaryViewArray, BinaryView),
2393 DataType::Binary => build_array_string!(BinaryArray, Binary),
2394 DataType::LargeBinary => build_array_string!(LargeBinaryArray, LargeBinary),
2395 DataType::Date32 => build_array_primitive!(Date32Array, Date32),
2396 DataType::Date64 => build_array_primitive!(Date64Array, Date64),
2397 DataType::Time32(TimeUnit::Second) => {
2398 build_array_primitive!(Time32SecondArray, Time32Second)
2399 }
2400 DataType::Time32(TimeUnit::Millisecond) => {
2401 build_array_primitive!(Time32MillisecondArray, Time32Millisecond)
2402 }
2403 DataType::Time64(TimeUnit::Microsecond) => {
2404 build_array_primitive!(Time64MicrosecondArray, Time64Microsecond)
2405 }
2406 DataType::Time64(TimeUnit::Nanosecond) => {
2407 build_array_primitive!(Time64NanosecondArray, Time64Nanosecond)
2408 }
2409 DataType::Timestamp(TimeUnit::Second, tz) => {
2410 build_array_primitive_tz!(TimestampSecondArray, TimestampSecond, tz)
2411 }
2412 DataType::Timestamp(TimeUnit::Millisecond, tz) => {
2413 build_array_primitive_tz!(
2414 TimestampMillisecondArray,
2415 TimestampMillisecond,
2416 tz
2417 )
2418 }
2419 DataType::Timestamp(TimeUnit::Microsecond, tz) => {
2420 build_array_primitive_tz!(
2421 TimestampMicrosecondArray,
2422 TimestampMicrosecond,
2423 tz
2424 )
2425 }
2426 DataType::Timestamp(TimeUnit::Nanosecond, tz) => {
2427 build_array_primitive_tz!(
2428 TimestampNanosecondArray,
2429 TimestampNanosecond,
2430 tz
2431 )
2432 }
2433 DataType::Duration(TimeUnit::Second) => {
2434 build_array_primitive!(DurationSecondArray, DurationSecond)
2435 }
2436 DataType::Duration(TimeUnit::Millisecond) => {
2437 build_array_primitive!(DurationMillisecondArray, DurationMillisecond)
2438 }
2439 DataType::Duration(TimeUnit::Microsecond) => {
2440 build_array_primitive!(DurationMicrosecondArray, DurationMicrosecond)
2441 }
2442 DataType::Duration(TimeUnit::Nanosecond) => {
2443 build_array_primitive!(DurationNanosecondArray, DurationNanosecond)
2444 }
2445 DataType::Interval(IntervalUnit::DayTime) => {
2446 build_array_primitive!(IntervalDayTimeArray, IntervalDayTime)
2447 }
2448 DataType::Interval(IntervalUnit::YearMonth) => {
2449 build_array_primitive!(IntervalYearMonthArray, IntervalYearMonth)
2450 }
2451 DataType::Interval(IntervalUnit::MonthDayNano) => {
2452 build_array_primitive!(IntervalMonthDayNanoArray, IntervalMonthDayNano)
2453 }
2454 DataType::FixedSizeList(_, _) => {
2455 let mut arrays =
2459 scalars.map(|s| s.to_array()).collect::<Result<Vec<_>>>()?;
2460 let first_non_null_data_type = arrays
2461 .iter()
2462 .find(|sv| !sv.is_null(0))
2463 .map(|sv| sv.data_type().to_owned());
2464 if let Some(DataType::FixedSizeList(f, l)) = first_non_null_data_type {
2465 for array in arrays.iter_mut() {
2466 if array.is_null(0) {
2467 *array = Arc::new(FixedSizeListArray::new_null(
2468 Arc::clone(&f),
2469 l,
2470 1,
2471 ));
2472 }
2473 }
2474 }
2475 let arrays = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
2476 arrow::compute::concat(arrays.as_slice())?
2477 }
2478 DataType::List(_)
2479 | DataType::LargeList(_)
2480 | DataType::Map(_, _)
2481 | DataType::Struct(_)
2482 | DataType::Union(_, _) => {
2483 let arrays = scalars.map(|s| s.to_array()).collect::<Result<Vec<_>>>()?;
2484 let arrays = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
2485 arrow::compute::concat(arrays.as_slice())?
2486 }
2487 DataType::Dictionary(key_type, value_type) => {
2488 let value_scalars = scalars
2490 .map(|scalar| match scalar {
2491 ScalarValue::Dictionary(inner_key_type, scalar) => {
2492 if &inner_key_type == key_type {
2493 Ok(*scalar)
2494 } else {
2495 _exec_err!("Expected inner key type of {key_type} but found: {inner_key_type}, value was ({scalar:?})")
2496 }
2497 }
2498 _ => {
2499 _exec_err!(
2500 "Expected scalar of type {value_type} but found: {scalar} {scalar:?}"
2501 )
2502 }
2503 })
2504 .collect::<Result<Vec<_>>>()?;
2505
2506 let values = Self::iter_to_array(value_scalars)?;
2507 assert_eq!(values.data_type(), value_type.as_ref());
2508
2509 match key_type.as_ref() {
2510 DataType::Int8 => dict_from_values::<Int8Type>(values)?,
2511 DataType::Int16 => dict_from_values::<Int16Type>(values)?,
2512 DataType::Int32 => dict_from_values::<Int32Type>(values)?,
2513 DataType::Int64 => dict_from_values::<Int64Type>(values)?,
2514 DataType::UInt8 => dict_from_values::<UInt8Type>(values)?,
2515 DataType::UInt16 => dict_from_values::<UInt16Type>(values)?,
2516 DataType::UInt32 => dict_from_values::<UInt32Type>(values)?,
2517 DataType::UInt64 => dict_from_values::<UInt64Type>(values)?,
2518 _ => unreachable!("Invalid dictionary keys type: {}", key_type),
2519 }
2520 }
2521 DataType::FixedSizeBinary(size) => {
2522 let array = scalars
2523 .map(|sv| {
2524 if let ScalarValue::FixedSizeBinary(_, v) = sv {
2525 Ok(v)
2526 } else {
2527 _exec_err!(
2528 "Inconsistent types in ScalarValue::iter_to_array. \
2529 Expected {data_type}, got {sv:?}"
2530 )
2531 }
2532 })
2533 .collect::<Result<Vec<_>>>()?;
2534 let array = FixedSizeBinaryArray::try_from_sparse_iter_with_size(
2535 array.into_iter(),
2536 *size,
2537 )?;
2538 Arc::new(array)
2539 }
2540 DataType::Time32(TimeUnit::Microsecond)
2546 | DataType::Time32(TimeUnit::Nanosecond)
2547 | DataType::Time64(TimeUnit::Second)
2548 | DataType::Time64(TimeUnit::Millisecond)
2549 | DataType::RunEndEncoded(_, _)
2550 | DataType::ListView(_)
2551 | DataType::LargeListView(_) => {
2552 return _not_impl_err!(
2553 "Unsupported creation of {:?} array from ScalarValue {:?}",
2554 data_type,
2555 scalars.peek()
2556 );
2557 }
2558 };
2559 Ok(array)
2560 }
2561
2562 fn iter_to_null_array(
2563 scalars: impl IntoIterator<Item = ScalarValue>,
2564 ) -> Result<ArrayRef> {
2565 let length = scalars.into_iter().try_fold(
2566 0usize,
2567 |r, element: ScalarValue| match element {
2568 ScalarValue::Null => Ok::<usize, DataFusionError>(r + 1),
2569 s => {
2570 _internal_err!("Expected ScalarValue::Null element. Received {s:?}")
2571 }
2572 },
2573 )?;
2574 Ok(new_null_array(&DataType::Null, length))
2575 }
2576
2577 fn iter_to_decimal32_array(
2578 scalars: impl IntoIterator<Item = ScalarValue>,
2579 precision: u8,
2580 scale: i8,
2581 ) -> Result<Decimal32Array> {
2582 let array = scalars
2583 .into_iter()
2584 .map(|element: ScalarValue| match element {
2585 ScalarValue::Decimal32(v1, _, _) => Ok(v1),
2586 s => {
2587 _internal_err!("Expected ScalarValue::Null element. Received {s:?}")
2588 }
2589 })
2590 .collect::<Result<Decimal32Array>>()?
2591 .with_precision_and_scale(precision, scale)?;
2592 Ok(array)
2593 }
2594
2595 fn iter_to_decimal64_array(
2596 scalars: impl IntoIterator<Item = ScalarValue>,
2597 precision: u8,
2598 scale: i8,
2599 ) -> Result<Decimal64Array> {
2600 let array = scalars
2601 .into_iter()
2602 .map(|element: ScalarValue| match element {
2603 ScalarValue::Decimal64(v1, _, _) => Ok(v1),
2604 s => {
2605 _internal_err!("Expected ScalarValue::Null element. Received {s:?}")
2606 }
2607 })
2608 .collect::<Result<Decimal64Array>>()?
2609 .with_precision_and_scale(precision, scale)?;
2610 Ok(array)
2611 }
2612
2613 fn iter_to_decimal128_array(
2614 scalars: impl IntoIterator<Item = ScalarValue>,
2615 precision: u8,
2616 scale: i8,
2617 ) -> Result<Decimal128Array> {
2618 let array = scalars
2619 .into_iter()
2620 .map(|element: ScalarValue| match element {
2621 ScalarValue::Decimal128(v1, _, _) => Ok(v1),
2622 s => {
2623 _internal_err!("Expected ScalarValue::Null element. Received {s:?}")
2624 }
2625 })
2626 .collect::<Result<Decimal128Array>>()?
2627 .with_precision_and_scale(precision, scale)?;
2628 Ok(array)
2629 }
2630
2631 fn iter_to_decimal256_array(
2632 scalars: impl IntoIterator<Item = ScalarValue>,
2633 precision: u8,
2634 scale: i8,
2635 ) -> Result<Decimal256Array> {
2636 let array = scalars
2637 .into_iter()
2638 .map(|element: ScalarValue| match element {
2639 ScalarValue::Decimal256(v1, _, _) => Ok(v1),
2640 s => {
2641 _internal_err!(
2642 "Expected ScalarValue::Decimal256 element. Received {s:?}"
2643 )
2644 }
2645 })
2646 .collect::<Result<Decimal256Array>>()?
2647 .with_precision_and_scale(precision, scale)?;
2648 Ok(array)
2649 }
2650
2651 fn build_decimal32_array(
2652 value: Option<i32>,
2653 precision: u8,
2654 scale: i8,
2655 size: usize,
2656 ) -> Result<Decimal32Array> {
2657 Ok(match value {
2658 Some(val) => Decimal32Array::from(vec![val; size])
2659 .with_precision_and_scale(precision, scale)?,
2660 None => {
2661 let mut builder = Decimal32Array::builder(size)
2662 .with_precision_and_scale(precision, scale)?;
2663 builder.append_nulls(size);
2664 builder.finish()
2665 }
2666 })
2667 }
2668
2669 fn build_decimal64_array(
2670 value: Option<i64>,
2671 precision: u8,
2672 scale: i8,
2673 size: usize,
2674 ) -> Result<Decimal64Array> {
2675 Ok(match value {
2676 Some(val) => Decimal64Array::from(vec![val; size])
2677 .with_precision_and_scale(precision, scale)?,
2678 None => {
2679 let mut builder = Decimal64Array::builder(size)
2680 .with_precision_and_scale(precision, scale)?;
2681 builder.append_nulls(size);
2682 builder.finish()
2683 }
2684 })
2685 }
2686
2687 fn build_decimal128_array(
2688 value: Option<i128>,
2689 precision: u8,
2690 scale: i8,
2691 size: usize,
2692 ) -> Result<Decimal128Array> {
2693 Ok(match value {
2694 Some(val) => Decimal128Array::from(vec![val; size])
2695 .with_precision_and_scale(precision, scale)?,
2696 None => {
2697 let mut builder = Decimal128Array::builder(size)
2698 .with_precision_and_scale(precision, scale)?;
2699 builder.append_nulls(size);
2700 builder.finish()
2701 }
2702 })
2703 }
2704
2705 fn build_decimal256_array(
2706 value: Option<i256>,
2707 precision: u8,
2708 scale: i8,
2709 size: usize,
2710 ) -> Result<Decimal256Array> {
2711 Ok(repeat_n(value, size)
2712 .collect::<Decimal256Array>()
2713 .with_precision_and_scale(precision, scale)?)
2714 }
2715
2716 pub fn new_list(
2743 values: &[ScalarValue],
2744 data_type: &DataType,
2745 nullable: bool,
2746 ) -> Arc<ListArray> {
2747 let values = if values.is_empty() {
2748 new_empty_array(data_type)
2749 } else {
2750 Self::iter_to_array(values.iter().cloned()).unwrap()
2751 };
2752 Arc::new(
2753 SingleRowListArrayBuilder::new(values)
2754 .with_nullable(nullable)
2755 .build_list_array(),
2756 )
2757 }
2758
2759 pub fn new_list_nullable(
2761 values: &[ScalarValue],
2762 data_type: &DataType,
2763 ) -> Arc<ListArray> {
2764 Self::new_list(values, data_type, true)
2765 }
2766
2767 pub fn new_null_list(data_type: DataType, nullable: bool, null_len: usize) -> Self {
2771 let data_type = DataType::List(Field::new_list_field(data_type, nullable).into());
2772 Self::List(Arc::new(ListArray::from(ArrayData::new_null(
2773 &data_type, null_len,
2774 ))))
2775 }
2776
2777 pub fn new_list_from_iter(
2805 values: impl IntoIterator<Item = ScalarValue> + ExactSizeIterator,
2806 data_type: &DataType,
2807 nullable: bool,
2808 ) -> Arc<ListArray> {
2809 let values = if values.len() == 0 {
2810 new_empty_array(data_type)
2811 } else {
2812 Self::iter_to_array(values).unwrap()
2813 };
2814 Arc::new(
2815 SingleRowListArrayBuilder::new(values)
2816 .with_nullable(nullable)
2817 .build_list_array(),
2818 )
2819 }
2820
2821 pub fn new_large_list(
2849 values: &[ScalarValue],
2850 data_type: &DataType,
2851 ) -> Arc<LargeListArray> {
2852 let values = if values.is_empty() {
2853 new_empty_array(data_type)
2854 } else {
2855 Self::iter_to_array(values.iter().cloned()).unwrap()
2856 };
2857 Arc::new(SingleRowListArrayBuilder::new(values).build_large_list_array())
2858 }
2859
2860 pub fn to_array_of_size(&self, size: usize) -> Result<ArrayRef> {
2870 Ok(match self {
2871 ScalarValue::Decimal32(e, precision, scale) => Arc::new(
2872 ScalarValue::build_decimal32_array(*e, *precision, *scale, size)?,
2873 ),
2874 ScalarValue::Decimal64(e, precision, scale) => Arc::new(
2875 ScalarValue::build_decimal64_array(*e, *precision, *scale, size)?,
2876 ),
2877 ScalarValue::Decimal128(e, precision, scale) => Arc::new(
2878 ScalarValue::build_decimal128_array(*e, *precision, *scale, size)?,
2879 ),
2880 ScalarValue::Decimal256(e, precision, scale) => Arc::new(
2881 ScalarValue::build_decimal256_array(*e, *precision, *scale, size)?,
2882 ),
2883 ScalarValue::Boolean(e) => match e {
2884 None => new_null_array(&DataType::Boolean, size),
2885 Some(true) => {
2886 Arc::new(BooleanArray::new(BooleanBuffer::new_set(size), None))
2887 as ArrayRef
2888 }
2889 Some(false) => {
2890 Arc::new(BooleanArray::new(BooleanBuffer::new_unset(size), None))
2891 as ArrayRef
2892 }
2893 },
2894 ScalarValue::Float64(e) => {
2895 build_array_from_option!(Float64, Float64Array, e, size)
2896 }
2897 ScalarValue::Float32(e) => {
2898 build_array_from_option!(Float32, Float32Array, e, size)
2899 }
2900 ScalarValue::Float16(e) => {
2901 build_array_from_option!(Float16, Float16Array, e, size)
2902 }
2903 ScalarValue::Int8(e) => build_array_from_option!(Int8, Int8Array, e, size),
2904 ScalarValue::Int16(e) => build_array_from_option!(Int16, Int16Array, e, size),
2905 ScalarValue::Int32(e) => build_array_from_option!(Int32, Int32Array, e, size),
2906 ScalarValue::Int64(e) => build_array_from_option!(Int64, Int64Array, e, size),
2907 ScalarValue::UInt8(e) => build_array_from_option!(UInt8, UInt8Array, e, size),
2908 ScalarValue::UInt16(e) => {
2909 build_array_from_option!(UInt16, UInt16Array, e, size)
2910 }
2911 ScalarValue::UInt32(e) => {
2912 build_array_from_option!(UInt32, UInt32Array, e, size)
2913 }
2914 ScalarValue::UInt64(e) => {
2915 build_array_from_option!(UInt64, UInt64Array, e, size)
2916 }
2917 ScalarValue::TimestampSecond(e, tz_opt) => {
2918 build_timestamp_array_from_option!(
2919 TimeUnit::Second,
2920 tz_opt.clone(),
2921 TimestampSecondArray,
2922 e,
2923 size
2924 )
2925 }
2926 ScalarValue::TimestampMillisecond(e, tz_opt) => {
2927 build_timestamp_array_from_option!(
2928 TimeUnit::Millisecond,
2929 tz_opt.clone(),
2930 TimestampMillisecondArray,
2931 e,
2932 size
2933 )
2934 }
2935
2936 ScalarValue::TimestampMicrosecond(e, tz_opt) => {
2937 build_timestamp_array_from_option!(
2938 TimeUnit::Microsecond,
2939 tz_opt.clone(),
2940 TimestampMicrosecondArray,
2941 e,
2942 size
2943 )
2944 }
2945 ScalarValue::TimestampNanosecond(e, tz_opt) => {
2946 build_timestamp_array_from_option!(
2947 TimeUnit::Nanosecond,
2948 tz_opt.clone(),
2949 TimestampNanosecondArray,
2950 e,
2951 size
2952 )
2953 }
2954 ScalarValue::Utf8(e) => match e {
2955 Some(value) => {
2956 Arc::new(StringArray::from_iter_values(repeat_n(value, size)))
2957 }
2958 None => new_null_array(&DataType::Utf8, size),
2959 },
2960 ScalarValue::Utf8View(e) => match e {
2961 Some(value) => {
2962 Arc::new(StringViewArray::from_iter_values(repeat_n(value, size)))
2963 }
2964 None => new_null_array(&DataType::Utf8View, size),
2965 },
2966 ScalarValue::LargeUtf8(e) => match e {
2967 Some(value) => {
2968 Arc::new(LargeStringArray::from_iter_values(repeat_n(value, size)))
2969 }
2970 None => new_null_array(&DataType::LargeUtf8, size),
2971 },
2972 ScalarValue::Binary(e) => match e {
2973 Some(value) => Arc::new(
2974 repeat_n(Some(value.as_slice()), size).collect::<BinaryArray>(),
2975 ),
2976 None => new_null_array(&DataType::Binary, size),
2977 },
2978 ScalarValue::BinaryView(e) => match e {
2979 Some(value) => Arc::new(
2980 repeat_n(Some(value.as_slice()), size).collect::<BinaryViewArray>(),
2981 ),
2982 None => new_null_array(&DataType::BinaryView, size),
2983 },
2984 ScalarValue::FixedSizeBinary(s, e) => match e {
2985 Some(value) => Arc::new(
2986 FixedSizeBinaryArray::try_from_sparse_iter_with_size(
2987 repeat_n(Some(value.as_slice()), size),
2988 *s,
2989 )
2990 .unwrap(),
2991 ),
2992 None => Arc::new(FixedSizeBinaryArray::new_null(*s, size)),
2993 },
2994 ScalarValue::LargeBinary(e) => match e {
2995 Some(value) => Arc::new(
2996 repeat_n(Some(value.as_slice()), size).collect::<LargeBinaryArray>(),
2997 ),
2998 None => new_null_array(&DataType::LargeBinary, size),
2999 },
3000 ScalarValue::List(arr) => {
3001 if size == 1 {
3002 return Ok(Arc::clone(arr) as Arc<dyn Array>);
3003 }
3004 Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
3005 }
3006 ScalarValue::LargeList(arr) => {
3007 if size == 1 {
3008 return Ok(Arc::clone(arr) as Arc<dyn Array>);
3009 }
3010 Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
3011 }
3012 ScalarValue::FixedSizeList(arr) => {
3013 if size == 1 {
3014 return Ok(Arc::clone(arr) as Arc<dyn Array>);
3015 }
3016 Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
3017 }
3018 ScalarValue::Struct(arr) => {
3019 if size == 1 {
3020 return Ok(Arc::clone(arr) as Arc<dyn Array>);
3021 }
3022 Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
3023 }
3024 ScalarValue::Map(arr) => {
3025 if size == 1 {
3026 return Ok(Arc::clone(arr) as Arc<dyn Array>);
3027 }
3028 Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
3029 }
3030 ScalarValue::Date32(e) => {
3031 build_array_from_option!(Date32, Date32Array, e, size)
3032 }
3033 ScalarValue::Date64(e) => {
3034 build_array_from_option!(Date64, Date64Array, e, size)
3035 }
3036 ScalarValue::Time32Second(e) => {
3037 build_array_from_option!(
3038 Time32,
3039 TimeUnit::Second,
3040 Time32SecondArray,
3041 e,
3042 size
3043 )
3044 }
3045 ScalarValue::Time32Millisecond(e) => {
3046 build_array_from_option!(
3047 Time32,
3048 TimeUnit::Millisecond,
3049 Time32MillisecondArray,
3050 e,
3051 size
3052 )
3053 }
3054 ScalarValue::Time64Microsecond(e) => {
3055 build_array_from_option!(
3056 Time64,
3057 TimeUnit::Microsecond,
3058 Time64MicrosecondArray,
3059 e,
3060 size
3061 )
3062 }
3063 ScalarValue::Time64Nanosecond(e) => {
3064 build_array_from_option!(
3065 Time64,
3066 TimeUnit::Nanosecond,
3067 Time64NanosecondArray,
3068 e,
3069 size
3070 )
3071 }
3072 ScalarValue::IntervalDayTime(e) => build_array_from_option!(
3073 Interval,
3074 IntervalUnit::DayTime,
3075 IntervalDayTimeArray,
3076 e,
3077 size
3078 ),
3079 ScalarValue::IntervalYearMonth(e) => build_array_from_option!(
3080 Interval,
3081 IntervalUnit::YearMonth,
3082 IntervalYearMonthArray,
3083 e,
3084 size
3085 ),
3086 ScalarValue::IntervalMonthDayNano(e) => build_array_from_option!(
3087 Interval,
3088 IntervalUnit::MonthDayNano,
3089 IntervalMonthDayNanoArray,
3090 e,
3091 size
3092 ),
3093 ScalarValue::DurationSecond(e) => build_array_from_option!(
3094 Duration,
3095 TimeUnit::Second,
3096 DurationSecondArray,
3097 e,
3098 size
3099 ),
3100 ScalarValue::DurationMillisecond(e) => build_array_from_option!(
3101 Duration,
3102 TimeUnit::Millisecond,
3103 DurationMillisecondArray,
3104 e,
3105 size
3106 ),
3107 ScalarValue::DurationMicrosecond(e) => build_array_from_option!(
3108 Duration,
3109 TimeUnit::Microsecond,
3110 DurationMicrosecondArray,
3111 e,
3112 size
3113 ),
3114 ScalarValue::DurationNanosecond(e) => build_array_from_option!(
3115 Duration,
3116 TimeUnit::Nanosecond,
3117 DurationNanosecondArray,
3118 e,
3119 size
3120 ),
3121 ScalarValue::Union(value, fields, mode) => match value {
3122 Some((v_id, value)) => {
3123 let mut new_fields = Vec::with_capacity(fields.len());
3124 let mut child_arrays = Vec::<ArrayRef>::with_capacity(fields.len());
3125 for (f_id, field) in fields.iter() {
3126 let ar = if f_id == *v_id {
3127 value.to_array_of_size(size)?
3128 } else {
3129 let dt = field.data_type();
3130 match mode {
3131 UnionMode::Sparse => new_null_array(dt, size),
3132 UnionMode::Dense => new_null_array(dt, 0),
3135 }
3136 };
3137 let field = (**field).clone();
3138 child_arrays.push(ar);
3139 new_fields.push(field.clone());
3140 }
3141 let type_ids = repeat_n(*v_id, size);
3142 let type_ids = ScalarBuffer::<i8>::from_iter(type_ids);
3143 let value_offsets = match mode {
3144 UnionMode::Sparse => None,
3145 UnionMode::Dense => Some(ScalarBuffer::from_iter(0..size as i32)),
3146 };
3147 let ar = UnionArray::try_new(
3148 fields.clone(),
3149 type_ids,
3150 value_offsets,
3151 child_arrays,
3152 )
3153 .map_err(|e| DataFusionError::ArrowError(Box::new(e), None))?;
3154 Arc::new(ar)
3155 }
3156 None => {
3157 let dt = self.data_type();
3158 new_null_array(&dt, size)
3159 }
3160 },
3161 ScalarValue::Dictionary(key_type, v) => {
3162 match key_type.as_ref() {
3164 DataType::Int8 => dict_from_scalar::<Int8Type>(v, size)?,
3165 DataType::Int16 => dict_from_scalar::<Int16Type>(v, size)?,
3166 DataType::Int32 => dict_from_scalar::<Int32Type>(v, size)?,
3167 DataType::Int64 => dict_from_scalar::<Int64Type>(v, size)?,
3168 DataType::UInt8 => dict_from_scalar::<UInt8Type>(v, size)?,
3169 DataType::UInt16 => dict_from_scalar::<UInt16Type>(v, size)?,
3170 DataType::UInt32 => dict_from_scalar::<UInt32Type>(v, size)?,
3171 DataType::UInt64 => dict_from_scalar::<UInt64Type>(v, size)?,
3172 _ => unreachable!("Invalid dictionary keys type: {}", key_type),
3173 }
3174 }
3175 ScalarValue::Null => get_or_create_cached_null_array(size),
3176 })
3177 }
3178
3179 fn get_decimal_value_from_array(
3180 array: &dyn Array,
3181 index: usize,
3182 precision: u8,
3183 scale: i8,
3184 ) -> Result<ScalarValue> {
3185 match array.data_type() {
3186 DataType::Decimal32(_, _) => {
3187 let array = as_decimal32_array(array)?;
3188 if array.is_null(index) {
3189 Ok(ScalarValue::Decimal32(None, precision, scale))
3190 } else {
3191 let value = array.value(index);
3192 Ok(ScalarValue::Decimal32(Some(value), precision, scale))
3193 }
3194 }
3195 DataType::Decimal64(_, _) => {
3196 let array = as_decimal64_array(array)?;
3197 if array.is_null(index) {
3198 Ok(ScalarValue::Decimal64(None, precision, scale))
3199 } else {
3200 let value = array.value(index);
3201 Ok(ScalarValue::Decimal64(Some(value), precision, scale))
3202 }
3203 }
3204 DataType::Decimal128(_, _) => {
3205 let array = as_decimal128_array(array)?;
3206 if array.is_null(index) {
3207 Ok(ScalarValue::Decimal128(None, precision, scale))
3208 } else {
3209 let value = array.value(index);
3210 Ok(ScalarValue::Decimal128(Some(value), precision, scale))
3211 }
3212 }
3213 DataType::Decimal256(_, _) => {
3214 let array = as_decimal256_array(array)?;
3215 if array.is_null(index) {
3216 Ok(ScalarValue::Decimal256(None, precision, scale))
3217 } else {
3218 let value = array.value(index);
3219 Ok(ScalarValue::Decimal256(Some(value), precision, scale))
3220 }
3221 }
3222 other => {
3223 unreachable!("Invalid type isn't decimal: {other:?}")
3224 }
3225 }
3226 }
3227
3228 fn list_to_array_of_size(arr: &dyn Array, size: usize) -> Result<ArrayRef> {
3229 let arrays = repeat_n(arr, size).collect::<Vec<_>>();
3230 let ret = match !arrays.is_empty() {
3231 true => arrow::compute::concat(arrays.as_slice())?,
3232 false => arr.slice(0, 0),
3233 };
3234 Ok(ret)
3235 }
3236
3237 pub fn convert_array_to_scalar_vec(
3339 array: &dyn Array,
3340 ) -> Result<Vec<Option<Vec<Self>>>> {
3341 fn generic_collect<OffsetSize: OffsetSizeTrait>(
3342 array: &dyn Array,
3343 ) -> Result<Vec<Option<Vec<ScalarValue>>>> {
3344 array
3345 .as_list::<OffsetSize>()
3346 .iter()
3347 .map(|nested_array| {
3348 nested_array
3349 .map(|array| {
3350 (0..array.len())
3351 .map(|i| ScalarValue::try_from_array(&array, i))
3352 .collect::<Result<Vec<_>>>()
3353 })
3354 .transpose()
3355 })
3356 .collect()
3357 }
3358
3359 match array.data_type() {
3360 DataType::List(_) => generic_collect::<i32>(array),
3361 DataType::LargeList(_) => generic_collect::<i64>(array),
3362 _ => _internal_err!(
3363 "ScalarValue::convert_array_to_scalar_vec input must be a List/LargeList type"
3364 ),
3365 }
3366 }
3367
3368 #[deprecated(
3369 since = "46.0.0",
3370 note = "This function is obsolete. Use `to_array` instead"
3371 )]
3372 pub fn raw_data(&self) -> Result<ArrayRef> {
3373 match self {
3374 ScalarValue::List(arr) => Ok(arr.to_owned()),
3375 _ => _internal_err!("ScalarValue is not a list"),
3376 }
3377 }
3378
3379 pub fn try_from_array(array: &dyn Array, index: usize) -> Result<Self> {
3381 if !array.is_valid(index) {
3383 return array.data_type().try_into();
3384 }
3385
3386 Ok(match array.data_type() {
3387 DataType::Null => ScalarValue::Null,
3388 DataType::Decimal32(precision, scale) => {
3389 ScalarValue::get_decimal_value_from_array(
3390 array, index, *precision, *scale,
3391 )?
3392 }
3393 DataType::Decimal64(precision, scale) => {
3394 ScalarValue::get_decimal_value_from_array(
3395 array, index, *precision, *scale,
3396 )?
3397 }
3398 DataType::Decimal128(precision, scale) => {
3399 ScalarValue::get_decimal_value_from_array(
3400 array, index, *precision, *scale,
3401 )?
3402 }
3403 DataType::Decimal256(precision, scale) => {
3404 ScalarValue::get_decimal_value_from_array(
3405 array, index, *precision, *scale,
3406 )?
3407 }
3408 DataType::Boolean => typed_cast!(array, index, as_boolean_array, Boolean)?,
3409 DataType::Float64 => typed_cast!(array, index, as_float64_array, Float64)?,
3410 DataType::Float32 => typed_cast!(array, index, as_float32_array, Float32)?,
3411 DataType::Float16 => typed_cast!(array, index, as_float16_array, Float16)?,
3412 DataType::UInt64 => typed_cast!(array, index, as_uint64_array, UInt64)?,
3413 DataType::UInt32 => typed_cast!(array, index, as_uint32_array, UInt32)?,
3414 DataType::UInt16 => typed_cast!(array, index, as_uint16_array, UInt16)?,
3415 DataType::UInt8 => typed_cast!(array, index, as_uint8_array, UInt8)?,
3416 DataType::Int64 => typed_cast!(array, index, as_int64_array, Int64)?,
3417 DataType::Int32 => typed_cast!(array, index, as_int32_array, Int32)?,
3418 DataType::Int16 => typed_cast!(array, index, as_int16_array, Int16)?,
3419 DataType::Int8 => typed_cast!(array, index, as_int8_array, Int8)?,
3420 DataType::Binary => typed_cast!(array, index, as_binary_array, Binary)?,
3421 DataType::LargeBinary => {
3422 typed_cast!(array, index, as_large_binary_array, LargeBinary)?
3423 }
3424 DataType::BinaryView => {
3425 typed_cast!(array, index, as_binary_view_array, BinaryView)?
3426 }
3427 DataType::Utf8 => typed_cast!(array, index, as_string_array, Utf8)?,
3428 DataType::LargeUtf8 => {
3429 typed_cast!(array, index, as_large_string_array, LargeUtf8)?
3430 }
3431 DataType::Utf8View => {
3432 typed_cast!(array, index, as_string_view_array, Utf8View)?
3433 }
3434 DataType::List(field) => {
3435 let list_array = array.as_list::<i32>();
3436 let nested_array = list_array.value(index);
3437 SingleRowListArrayBuilder::new(nested_array)
3439 .with_field(field)
3440 .build_list_scalar()
3441 }
3442 DataType::LargeList(field) => {
3443 let list_array = as_large_list_array(array)?;
3444 let nested_array = list_array.value(index);
3445 SingleRowListArrayBuilder::new(nested_array)
3447 .with_field(field)
3448 .build_large_list_scalar()
3449 }
3450 DataType::FixedSizeList(field, _) => {
3452 let list_array = as_fixed_size_list_array(array)?;
3453 let nested_array = list_array.value(index);
3454 let list_size = nested_array.len();
3456 SingleRowListArrayBuilder::new(nested_array)
3457 .with_field(field)
3458 .build_fixed_size_list_scalar(list_size)
3459 }
3460 DataType::Date32 => typed_cast!(array, index, as_date32_array, Date32)?,
3461 DataType::Date64 => typed_cast!(array, index, as_date64_array, Date64)?,
3462 DataType::Time32(TimeUnit::Second) => {
3463 typed_cast!(array, index, as_time32_second_array, Time32Second)?
3464 }
3465 DataType::Time32(TimeUnit::Millisecond) => {
3466 typed_cast!(array, index, as_time32_millisecond_array, Time32Millisecond)?
3467 }
3468 DataType::Time64(TimeUnit::Microsecond) => {
3469 typed_cast!(array, index, as_time64_microsecond_array, Time64Microsecond)?
3470 }
3471 DataType::Time64(TimeUnit::Nanosecond) => {
3472 typed_cast!(array, index, as_time64_nanosecond_array, Time64Nanosecond)?
3473 }
3474 DataType::Timestamp(TimeUnit::Second, tz_opt) => typed_cast_tz!(
3475 array,
3476 index,
3477 as_timestamp_second_array,
3478 TimestampSecond,
3479 tz_opt
3480 )?,
3481 DataType::Timestamp(TimeUnit::Millisecond, tz_opt) => typed_cast_tz!(
3482 array,
3483 index,
3484 as_timestamp_millisecond_array,
3485 TimestampMillisecond,
3486 tz_opt
3487 )?,
3488 DataType::Timestamp(TimeUnit::Microsecond, tz_opt) => typed_cast_tz!(
3489 array,
3490 index,
3491 as_timestamp_microsecond_array,
3492 TimestampMicrosecond,
3493 tz_opt
3494 )?,
3495 DataType::Timestamp(TimeUnit::Nanosecond, tz_opt) => typed_cast_tz!(
3496 array,
3497 index,
3498 as_timestamp_nanosecond_array,
3499 TimestampNanosecond,
3500 tz_opt
3501 )?,
3502 DataType::Dictionary(key_type, _) => {
3503 let (values_array, values_index) = match key_type.as_ref() {
3504 DataType::Int8 => get_dict_value::<Int8Type>(array, index)?,
3505 DataType::Int16 => get_dict_value::<Int16Type>(array, index)?,
3506 DataType::Int32 => get_dict_value::<Int32Type>(array, index)?,
3507 DataType::Int64 => get_dict_value::<Int64Type>(array, index)?,
3508 DataType::UInt8 => get_dict_value::<UInt8Type>(array, index)?,
3509 DataType::UInt16 => get_dict_value::<UInt16Type>(array, index)?,
3510 DataType::UInt32 => get_dict_value::<UInt32Type>(array, index)?,
3511 DataType::UInt64 => get_dict_value::<UInt64Type>(array, index)?,
3512 _ => unreachable!("Invalid dictionary keys type: {}", key_type),
3513 };
3514 let value = match values_index {
3516 Some(values_index) => {
3517 ScalarValue::try_from_array(values_array, values_index)
3518 }
3519 None => values_array.data_type().try_into(),
3521 }?;
3522
3523 Self::Dictionary(key_type.clone(), Box::new(value))
3524 }
3525 DataType::Struct(_) => {
3526 let a = array.slice(index, 1);
3527 Self::Struct(Arc::new(a.as_struct().to_owned()))
3528 }
3529 DataType::FixedSizeBinary(_) => {
3530 let array = as_fixed_size_binary_array(array)?;
3531 let size = match array.data_type() {
3532 DataType::FixedSizeBinary(size) => *size,
3533 _ => unreachable!(),
3534 };
3535 ScalarValue::FixedSizeBinary(
3536 size,
3537 match array.is_null(index) {
3538 true => None,
3539 false => Some(array.value(index).into()),
3540 },
3541 )
3542 }
3543 DataType::Interval(IntervalUnit::DayTime) => {
3544 typed_cast!(array, index, as_interval_dt_array, IntervalDayTime)?
3545 }
3546 DataType::Interval(IntervalUnit::YearMonth) => {
3547 typed_cast!(array, index, as_interval_ym_array, IntervalYearMonth)?
3548 }
3549 DataType::Interval(IntervalUnit::MonthDayNano) => {
3550 typed_cast!(array, index, as_interval_mdn_array, IntervalMonthDayNano)?
3551 }
3552
3553 DataType::Duration(TimeUnit::Second) => {
3554 typed_cast!(array, index, as_duration_second_array, DurationSecond)?
3555 }
3556 DataType::Duration(TimeUnit::Millisecond) => typed_cast!(
3557 array,
3558 index,
3559 as_duration_millisecond_array,
3560 DurationMillisecond
3561 )?,
3562 DataType::Duration(TimeUnit::Microsecond) => typed_cast!(
3563 array,
3564 index,
3565 as_duration_microsecond_array,
3566 DurationMicrosecond
3567 )?,
3568 DataType::Duration(TimeUnit::Nanosecond) => typed_cast!(
3569 array,
3570 index,
3571 as_duration_nanosecond_array,
3572 DurationNanosecond
3573 )?,
3574 DataType::Map(_, _) => {
3575 let a = array.slice(index, 1);
3576 Self::Map(Arc::new(a.as_map().to_owned()))
3577 }
3578 DataType::Union(fields, mode) => {
3579 let array = as_union_array(array)?;
3580 let ti = array.type_id(index);
3581 let index = array.value_offset(index);
3582 let value = ScalarValue::try_from_array(array.child(ti), index)?;
3583 ScalarValue::Union(Some((ti, Box::new(value))), fields.clone(), *mode)
3584 }
3585 other => {
3586 return _not_impl_err!(
3587 "Can't create a scalar from array of type \"{other:?}\""
3588 );
3589 }
3590 })
3591 }
3592
3593 pub fn try_from_string(value: String, target_type: &DataType) -> Result<Self> {
3595 ScalarValue::from(value).cast_to(target_type)
3596 }
3597
3598 pub fn try_as_str(&self) -> Option<Option<&str>> {
3632 let v = match self {
3633 ScalarValue::Utf8(v) => v,
3634 ScalarValue::LargeUtf8(v) => v,
3635 ScalarValue::Utf8View(v) => v,
3636 ScalarValue::Dictionary(_, v) => return v.try_as_str(),
3637 _ => return None,
3638 };
3639 Some(v.as_ref().map(|v| v.as_str()))
3640 }
3641
3642 pub fn cast_to(&self, target_type: &DataType) -> Result<Self> {
3644 self.cast_to_with_options(target_type, &DEFAULT_CAST_OPTIONS)
3645 }
3646
3647 pub fn cast_to_with_options(
3649 &self,
3650 target_type: &DataType,
3651 cast_options: &CastOptions<'static>,
3652 ) -> Result<Self> {
3653 let scalar_array = self.to_array()?;
3654 let cast_arr = cast_with_options(&scalar_array, target_type, cast_options)?;
3655 ScalarValue::try_from_array(&cast_arr, 0)
3656 }
3657
3658 fn eq_array_decimal32(
3659 array: &ArrayRef,
3660 index: usize,
3661 value: Option<&i32>,
3662 precision: u8,
3663 scale: i8,
3664 ) -> Result<bool> {
3665 let array = as_decimal32_array(array)?;
3666 if array.precision() != precision || array.scale() != scale {
3667 return Ok(false);
3668 }
3669 let is_null = array.is_null(index);
3670 if let Some(v) = value {
3671 Ok(!array.is_null(index) && array.value(index) == *v)
3672 } else {
3673 Ok(is_null)
3674 }
3675 }
3676
3677 fn eq_array_decimal64(
3678 array: &ArrayRef,
3679 index: usize,
3680 value: Option<&i64>,
3681 precision: u8,
3682 scale: i8,
3683 ) -> Result<bool> {
3684 let array = as_decimal64_array(array)?;
3685 if array.precision() != precision || array.scale() != scale {
3686 return Ok(false);
3687 }
3688 let is_null = array.is_null(index);
3689 if let Some(v) = value {
3690 Ok(!array.is_null(index) && array.value(index) == *v)
3691 } else {
3692 Ok(is_null)
3693 }
3694 }
3695
3696 fn eq_array_decimal(
3697 array: &ArrayRef,
3698 index: usize,
3699 value: Option<&i128>,
3700 precision: u8,
3701 scale: i8,
3702 ) -> Result<bool> {
3703 let array = as_decimal128_array(array)?;
3704 if array.precision() != precision || array.scale() != scale {
3705 return Ok(false);
3706 }
3707 let is_null = array.is_null(index);
3708 if let Some(v) = value {
3709 Ok(!array.is_null(index) && array.value(index) == *v)
3710 } else {
3711 Ok(is_null)
3712 }
3713 }
3714
3715 fn eq_array_decimal256(
3716 array: &ArrayRef,
3717 index: usize,
3718 value: Option<&i256>,
3719 precision: u8,
3720 scale: i8,
3721 ) -> Result<bool> {
3722 let array = as_decimal256_array(array)?;
3723 if array.precision() != precision || array.scale() != scale {
3724 return Ok(false);
3725 }
3726 let is_null = array.is_null(index);
3727 if let Some(v) = value {
3728 Ok(!array.is_null(index) && array.value(index) == *v)
3729 } else {
3730 Ok(is_null)
3731 }
3732 }
3733
3734 #[inline]
3761 pub fn eq_array(&self, array: &ArrayRef, index: usize) -> Result<bool> {
3762 Ok(match self {
3763 ScalarValue::Decimal32(v, precision, scale) => {
3764 ScalarValue::eq_array_decimal32(
3765 array,
3766 index,
3767 v.as_ref(),
3768 *precision,
3769 *scale,
3770 )?
3771 }
3772 ScalarValue::Decimal64(v, precision, scale) => {
3773 ScalarValue::eq_array_decimal64(
3774 array,
3775 index,
3776 v.as_ref(),
3777 *precision,
3778 *scale,
3779 )?
3780 }
3781 ScalarValue::Decimal128(v, precision, scale) => {
3782 ScalarValue::eq_array_decimal(
3783 array,
3784 index,
3785 v.as_ref(),
3786 *precision,
3787 *scale,
3788 )?
3789 }
3790 ScalarValue::Decimal256(v, precision, scale) => {
3791 ScalarValue::eq_array_decimal256(
3792 array,
3793 index,
3794 v.as_ref(),
3795 *precision,
3796 *scale,
3797 )?
3798 }
3799 ScalarValue::Boolean(val) => {
3800 eq_array_primitive!(array, index, as_boolean_array, val)?
3801 }
3802 ScalarValue::Float16(val) => {
3803 eq_array_primitive!(array, index, as_float16_array, val)?
3804 }
3805 ScalarValue::Float32(val) => {
3806 eq_array_primitive!(array, index, as_float32_array, val)?
3807 }
3808 ScalarValue::Float64(val) => {
3809 eq_array_primitive!(array, index, as_float64_array, val)?
3810 }
3811 ScalarValue::Int8(val) => {
3812 eq_array_primitive!(array, index, as_int8_array, val)?
3813 }
3814 ScalarValue::Int16(val) => {
3815 eq_array_primitive!(array, index, as_int16_array, val)?
3816 }
3817 ScalarValue::Int32(val) => {
3818 eq_array_primitive!(array, index, as_int32_array, val)?
3819 }
3820 ScalarValue::Int64(val) => {
3821 eq_array_primitive!(array, index, as_int64_array, val)?
3822 }
3823 ScalarValue::UInt8(val) => {
3824 eq_array_primitive!(array, index, as_uint8_array, val)?
3825 }
3826 ScalarValue::UInt16(val) => {
3827 eq_array_primitive!(array, index, as_uint16_array, val)?
3828 }
3829 ScalarValue::UInt32(val) => {
3830 eq_array_primitive!(array, index, as_uint32_array, val)?
3831 }
3832 ScalarValue::UInt64(val) => {
3833 eq_array_primitive!(array, index, as_uint64_array, val)?
3834 }
3835 ScalarValue::Utf8(val) => {
3836 eq_array_primitive!(array, index, as_string_array, val)?
3837 }
3838 ScalarValue::Utf8View(val) => {
3839 eq_array_primitive!(array, index, as_string_view_array, val)?
3840 }
3841 ScalarValue::LargeUtf8(val) => {
3842 eq_array_primitive!(array, index, as_large_string_array, val)?
3843 }
3844 ScalarValue::Binary(val) => {
3845 eq_array_primitive!(array, index, as_binary_array, val)?
3846 }
3847 ScalarValue::BinaryView(val) => {
3848 eq_array_primitive!(array, index, as_binary_view_array, val)?
3849 }
3850 ScalarValue::FixedSizeBinary(_, val) => {
3851 eq_array_primitive!(array, index, as_fixed_size_binary_array, val)?
3852 }
3853 ScalarValue::LargeBinary(val) => {
3854 eq_array_primitive!(array, index, as_large_binary_array, val)?
3855 }
3856 ScalarValue::List(arr) => {
3857 Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
3858 }
3859 ScalarValue::LargeList(arr) => {
3860 Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
3861 }
3862 ScalarValue::FixedSizeList(arr) => {
3863 Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
3864 }
3865 ScalarValue::Struct(arr) => {
3866 Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
3867 }
3868 ScalarValue::Map(arr) => {
3869 Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
3870 }
3871 ScalarValue::Date32(val) => {
3872 eq_array_primitive!(array, index, as_date32_array, val)?
3873 }
3874 ScalarValue::Date64(val) => {
3875 eq_array_primitive!(array, index, as_date64_array, val)?
3876 }
3877 ScalarValue::Time32Second(val) => {
3878 eq_array_primitive!(array, index, as_time32_second_array, val)?
3879 }
3880 ScalarValue::Time32Millisecond(val) => {
3881 eq_array_primitive!(array, index, as_time32_millisecond_array, val)?
3882 }
3883 ScalarValue::Time64Microsecond(val) => {
3884 eq_array_primitive!(array, index, as_time64_microsecond_array, val)?
3885 }
3886 ScalarValue::Time64Nanosecond(val) => {
3887 eq_array_primitive!(array, index, as_time64_nanosecond_array, val)?
3888 }
3889 ScalarValue::TimestampSecond(val, _) => {
3890 eq_array_primitive!(array, index, as_timestamp_second_array, val)?
3891 }
3892 ScalarValue::TimestampMillisecond(val, _) => {
3893 eq_array_primitive!(array, index, as_timestamp_millisecond_array, val)?
3894 }
3895 ScalarValue::TimestampMicrosecond(val, _) => {
3896 eq_array_primitive!(array, index, as_timestamp_microsecond_array, val)?
3897 }
3898 ScalarValue::TimestampNanosecond(val, _) => {
3899 eq_array_primitive!(array, index, as_timestamp_nanosecond_array, val)?
3900 }
3901 ScalarValue::IntervalYearMonth(val) => {
3902 eq_array_primitive!(array, index, as_interval_ym_array, val)?
3903 }
3904 ScalarValue::IntervalDayTime(val) => {
3905 eq_array_primitive!(array, index, as_interval_dt_array, val)?
3906 }
3907 ScalarValue::IntervalMonthDayNano(val) => {
3908 eq_array_primitive!(array, index, as_interval_mdn_array, val)?
3909 }
3910 ScalarValue::DurationSecond(val) => {
3911 eq_array_primitive!(array, index, as_duration_second_array, val)?
3912 }
3913 ScalarValue::DurationMillisecond(val) => {
3914 eq_array_primitive!(array, index, as_duration_millisecond_array, val)?
3915 }
3916 ScalarValue::DurationMicrosecond(val) => {
3917 eq_array_primitive!(array, index, as_duration_microsecond_array, val)?
3918 }
3919 ScalarValue::DurationNanosecond(val) => {
3920 eq_array_primitive!(array, index, as_duration_nanosecond_array, val)?
3921 }
3922 ScalarValue::Union(value, _, _) => {
3923 let array = as_union_array(array)?;
3924 let ti = array.type_id(index);
3925 let index = array.value_offset(index);
3926 if let Some((ti_v, value)) = value {
3927 ti_v == &ti && value.eq_array(array.child(ti), index)?
3928 } else {
3929 array.child(ti).is_null(index)
3930 }
3931 }
3932 ScalarValue::Dictionary(key_type, v) => {
3933 let (values_array, values_index) = match key_type.as_ref() {
3934 DataType::Int8 => get_dict_value::<Int8Type>(array, index)?,
3935 DataType::Int16 => get_dict_value::<Int16Type>(array, index)?,
3936 DataType::Int32 => get_dict_value::<Int32Type>(array, index)?,
3937 DataType::Int64 => get_dict_value::<Int64Type>(array, index)?,
3938 DataType::UInt8 => get_dict_value::<UInt8Type>(array, index)?,
3939 DataType::UInt16 => get_dict_value::<UInt16Type>(array, index)?,
3940 DataType::UInt32 => get_dict_value::<UInt32Type>(array, index)?,
3941 DataType::UInt64 => get_dict_value::<UInt64Type>(array, index)?,
3942 _ => unreachable!("Invalid dictionary keys type: {}", key_type),
3943 };
3944 match values_index {
3946 Some(values_index) => v.eq_array(values_array, values_index)?,
3947 None => v.is_null(),
3948 }
3949 }
3950 ScalarValue::Null => array.is_null(index),
3951 })
3952 }
3953
3954 fn eq_array_list(arr1: &ArrayRef, arr2: &ArrayRef, index: usize) -> bool {
3955 let right = arr2.slice(index, 1);
3956 arr1 == &right
3957 }
3958
3959 pub fn try_cmp(&self, other: &Self) -> Result<Ordering> {
3964 self.partial_cmp(other).ok_or_else(|| {
3965 _internal_datafusion_err!("Uncomparable values: {self:?}, {other:?}")
3966 })
3967 }
3968
3969 pub fn size(&self) -> usize {
3972 size_of_val(self)
3973 + match self {
3974 ScalarValue::Null
3975 | ScalarValue::Boolean(_)
3976 | ScalarValue::Float16(_)
3977 | ScalarValue::Float32(_)
3978 | ScalarValue::Float64(_)
3979 | ScalarValue::Decimal32(_, _, _)
3980 | ScalarValue::Decimal64(_, _, _)
3981 | ScalarValue::Decimal128(_, _, _)
3982 | ScalarValue::Decimal256(_, _, _)
3983 | ScalarValue::Int8(_)
3984 | ScalarValue::Int16(_)
3985 | ScalarValue::Int32(_)
3986 | ScalarValue::Int64(_)
3987 | ScalarValue::UInt8(_)
3988 | ScalarValue::UInt16(_)
3989 | ScalarValue::UInt32(_)
3990 | ScalarValue::UInt64(_)
3991 | ScalarValue::Date32(_)
3992 | ScalarValue::Date64(_)
3993 | ScalarValue::Time32Second(_)
3994 | ScalarValue::Time32Millisecond(_)
3995 | ScalarValue::Time64Microsecond(_)
3996 | ScalarValue::Time64Nanosecond(_)
3997 | ScalarValue::IntervalYearMonth(_)
3998 | ScalarValue::IntervalDayTime(_)
3999 | ScalarValue::IntervalMonthDayNano(_)
4000 | ScalarValue::DurationSecond(_)
4001 | ScalarValue::DurationMillisecond(_)
4002 | ScalarValue::DurationMicrosecond(_)
4003 | ScalarValue::DurationNanosecond(_) => 0,
4004 ScalarValue::Utf8(s)
4005 | ScalarValue::LargeUtf8(s)
4006 | ScalarValue::Utf8View(s) => {
4007 s.as_ref().map(|s| s.capacity()).unwrap_or_default()
4008 }
4009 ScalarValue::TimestampSecond(_, s)
4010 | ScalarValue::TimestampMillisecond(_, s)
4011 | ScalarValue::TimestampMicrosecond(_, s)
4012 | ScalarValue::TimestampNanosecond(_, s) => {
4013 s.as_ref().map(|s| s.len()).unwrap_or_default()
4014 }
4015 ScalarValue::Binary(b)
4016 | ScalarValue::FixedSizeBinary(_, b)
4017 | ScalarValue::LargeBinary(b)
4018 | ScalarValue::BinaryView(b) => {
4019 b.as_ref().map(|b| b.capacity()).unwrap_or_default()
4020 }
4021 ScalarValue::List(arr) => arr.get_array_memory_size(),
4022 ScalarValue::LargeList(arr) => arr.get_array_memory_size(),
4023 ScalarValue::FixedSizeList(arr) => arr.get_array_memory_size(),
4024 ScalarValue::Struct(arr) => arr.get_array_memory_size(),
4025 ScalarValue::Map(arr) => arr.get_array_memory_size(),
4026 ScalarValue::Union(vals, fields, _mode) => {
4027 vals.as_ref()
4028 .map(|(_id, sv)| sv.size() - size_of_val(sv))
4029 .unwrap_or_default()
4030 + size_of_val(fields)
4032 + (size_of::<Field>() * fields.len())
4033 + fields.iter().map(|(_idx, field)| field.size() - size_of_val(field)).sum::<usize>()
4034 }
4035 ScalarValue::Dictionary(dt, sv) => {
4036 dt.size() + sv.size()
4038 }
4039 }
4040 }
4041
4042 pub fn size_of_vec(vec: &Vec<Self>) -> usize {
4046 size_of_val(vec)
4047 + (size_of::<ScalarValue>() * vec.capacity())
4048 + vec
4049 .iter()
4050 .map(|sv| sv.size() - size_of_val(sv))
4051 .sum::<usize>()
4052 }
4053
4054 pub fn size_of_vec_deque(vec_deque: &VecDeque<Self>) -> usize {
4058 size_of_val(vec_deque)
4059 + (size_of::<ScalarValue>() * vec_deque.capacity())
4060 + vec_deque
4061 .iter()
4062 .map(|sv| sv.size() - size_of_val(sv))
4063 .sum::<usize>()
4064 }
4065
4066 pub fn size_of_hashset<S>(set: &HashSet<Self, S>) -> usize {
4070 size_of_val(set)
4071 + (size_of::<ScalarValue>() * set.capacity())
4072 + set
4073 .iter()
4074 .map(|sv| sv.size() - size_of_val(sv))
4075 .sum::<usize>()
4076 }
4077
4078 pub fn compact(&mut self) {
4084 match self {
4085 ScalarValue::Null
4086 | ScalarValue::Boolean(_)
4087 | ScalarValue::Float16(_)
4088 | ScalarValue::Float32(_)
4089 | ScalarValue::Float64(_)
4090 | ScalarValue::Decimal32(_, _, _)
4091 | ScalarValue::Decimal64(_, _, _)
4092 | ScalarValue::Decimal128(_, _, _)
4093 | ScalarValue::Decimal256(_, _, _)
4094 | ScalarValue::Int8(_)
4095 | ScalarValue::Int16(_)
4096 | ScalarValue::Int32(_)
4097 | ScalarValue::Int64(_)
4098 | ScalarValue::UInt8(_)
4099 | ScalarValue::UInt16(_)
4100 | ScalarValue::UInt32(_)
4101 | ScalarValue::UInt64(_)
4102 | ScalarValue::Date32(_)
4103 | ScalarValue::Date64(_)
4104 | ScalarValue::Time32Second(_)
4105 | ScalarValue::Time32Millisecond(_)
4106 | ScalarValue::Time64Microsecond(_)
4107 | ScalarValue::Time64Nanosecond(_)
4108 | ScalarValue::IntervalYearMonth(_)
4109 | ScalarValue::IntervalDayTime(_)
4110 | ScalarValue::IntervalMonthDayNano(_)
4111 | ScalarValue::DurationSecond(_)
4112 | ScalarValue::DurationMillisecond(_)
4113 | ScalarValue::DurationMicrosecond(_)
4114 | ScalarValue::DurationNanosecond(_)
4115 | ScalarValue::Utf8(_)
4116 | ScalarValue::LargeUtf8(_)
4117 | ScalarValue::Utf8View(_)
4118 | ScalarValue::TimestampSecond(_, _)
4119 | ScalarValue::TimestampMillisecond(_, _)
4120 | ScalarValue::TimestampMicrosecond(_, _)
4121 | ScalarValue::TimestampNanosecond(_, _)
4122 | ScalarValue::Binary(_)
4123 | ScalarValue::FixedSizeBinary(_, _)
4124 | ScalarValue::LargeBinary(_)
4125 | ScalarValue::BinaryView(_) => (),
4126 ScalarValue::FixedSizeList(arr) => {
4127 let array = copy_array_data(&arr.to_data());
4128 *Arc::make_mut(arr) = FixedSizeListArray::from(array);
4129 }
4130 ScalarValue::List(arr) => {
4131 let array = copy_array_data(&arr.to_data());
4132 *Arc::make_mut(arr) = ListArray::from(array);
4133 }
4134 ScalarValue::LargeList(arr) => {
4135 let array = copy_array_data(&arr.to_data());
4136 *Arc::make_mut(arr) = LargeListArray::from(array)
4137 }
4138 ScalarValue::Struct(arr) => {
4139 let array = copy_array_data(&arr.to_data());
4140 *Arc::make_mut(arr) = StructArray::from(array);
4141 }
4142 ScalarValue::Map(arr) => {
4143 let array = copy_array_data(&arr.to_data());
4144 *Arc::make_mut(arr) = MapArray::from(array);
4145 }
4146 ScalarValue::Union(val, _, _) => {
4147 if let Some((_, value)) = val.as_mut() {
4148 value.compact();
4149 }
4150 }
4151 ScalarValue::Dictionary(_, value) => {
4152 value.compact();
4153 }
4154 }
4155 }
4156
4157 pub fn compacted(mut self) -> Self {
4159 self.compact();
4160 self
4161 }
4162
4163 pub fn min(datatype: &DataType) -> Option<ScalarValue> {
4178 match datatype {
4179 DataType::Int8 => Some(ScalarValue::Int8(Some(i8::MIN))),
4180 DataType::Int16 => Some(ScalarValue::Int16(Some(i16::MIN))),
4181 DataType::Int32 => Some(ScalarValue::Int32(Some(i32::MIN))),
4182 DataType::Int64 => Some(ScalarValue::Int64(Some(i64::MIN))),
4183 DataType::UInt8 => Some(ScalarValue::UInt8(Some(u8::MIN))),
4184 DataType::UInt16 => Some(ScalarValue::UInt16(Some(u16::MIN))),
4185 DataType::UInt32 => Some(ScalarValue::UInt32(Some(u32::MIN))),
4186 DataType::UInt64 => Some(ScalarValue::UInt64(Some(u64::MIN))),
4187 DataType::Float16 => Some(ScalarValue::Float16(Some(f16::NEG_INFINITY))),
4188 DataType::Float32 => Some(ScalarValue::Float32(Some(f32::NEG_INFINITY))),
4189 DataType::Float64 => Some(ScalarValue::Float64(Some(f64::NEG_INFINITY))),
4190 DataType::Decimal128(precision, scale) => {
4191 let max_digits = 10_i128.pow(*precision as u32) - 1;
4194 Some(ScalarValue::Decimal128(
4195 Some(-max_digits),
4196 *precision,
4197 *scale,
4198 ))
4199 }
4200 DataType::Decimal256(precision, scale) => {
4201 let max_digits = i256::from_i128(10_i128)
4204 .checked_pow(*precision as u32)
4205 .and_then(|v| v.checked_sub(i256::from_i128(1)))
4206 .unwrap_or(i256::MAX);
4207 Some(ScalarValue::Decimal256(
4208 Some(max_digits.neg_wrapping()),
4209 *precision,
4210 *scale,
4211 ))
4212 }
4213 DataType::Date32 => Some(ScalarValue::Date32(Some(i32::MIN))),
4214 DataType::Date64 => Some(ScalarValue::Date64(Some(i64::MIN))),
4215 DataType::Time32(TimeUnit::Second) => {
4216 Some(ScalarValue::Time32Second(Some(0)))
4217 }
4218 DataType::Time32(TimeUnit::Millisecond) => {
4219 Some(ScalarValue::Time32Millisecond(Some(0)))
4220 }
4221 DataType::Time64(TimeUnit::Microsecond) => {
4222 Some(ScalarValue::Time64Microsecond(Some(0)))
4223 }
4224 DataType::Time64(TimeUnit::Nanosecond) => {
4225 Some(ScalarValue::Time64Nanosecond(Some(0)))
4226 }
4227 DataType::Timestamp(unit, tz) => match unit {
4228 TimeUnit::Second => {
4229 Some(ScalarValue::TimestampSecond(Some(i64::MIN), tz.clone()))
4230 }
4231 TimeUnit::Millisecond => Some(ScalarValue::TimestampMillisecond(
4232 Some(i64::MIN),
4233 tz.clone(),
4234 )),
4235 TimeUnit::Microsecond => Some(ScalarValue::TimestampMicrosecond(
4236 Some(i64::MIN),
4237 tz.clone(),
4238 )),
4239 TimeUnit::Nanosecond => {
4240 Some(ScalarValue::TimestampNanosecond(Some(i64::MIN), tz.clone()))
4241 }
4242 },
4243 DataType::Duration(unit) => match unit {
4244 TimeUnit::Second => Some(ScalarValue::DurationSecond(Some(i64::MIN))),
4245 TimeUnit::Millisecond => {
4246 Some(ScalarValue::DurationMillisecond(Some(i64::MIN)))
4247 }
4248 TimeUnit::Microsecond => {
4249 Some(ScalarValue::DurationMicrosecond(Some(i64::MIN)))
4250 }
4251 TimeUnit::Nanosecond => {
4252 Some(ScalarValue::DurationNanosecond(Some(i64::MIN)))
4253 }
4254 },
4255 _ => None,
4256 }
4257 }
4258
4259 pub fn max(datatype: &DataType) -> Option<ScalarValue> {
4274 match datatype {
4275 DataType::Int8 => Some(ScalarValue::Int8(Some(i8::MAX))),
4276 DataType::Int16 => Some(ScalarValue::Int16(Some(i16::MAX))),
4277 DataType::Int32 => Some(ScalarValue::Int32(Some(i32::MAX))),
4278 DataType::Int64 => Some(ScalarValue::Int64(Some(i64::MAX))),
4279 DataType::UInt8 => Some(ScalarValue::UInt8(Some(u8::MAX))),
4280 DataType::UInt16 => Some(ScalarValue::UInt16(Some(u16::MAX))),
4281 DataType::UInt32 => Some(ScalarValue::UInt32(Some(u32::MAX))),
4282 DataType::UInt64 => Some(ScalarValue::UInt64(Some(u64::MAX))),
4283 DataType::Float16 => Some(ScalarValue::Float16(Some(f16::INFINITY))),
4284 DataType::Float32 => Some(ScalarValue::Float32(Some(f32::INFINITY))),
4285 DataType::Float64 => Some(ScalarValue::Float64(Some(f64::INFINITY))),
4286 DataType::Decimal128(precision, scale) => {
4287 let max_digits = 10_i128.pow(*precision as u32) - 1;
4290 Some(ScalarValue::Decimal128(
4291 Some(max_digits),
4292 *precision,
4293 *scale,
4294 ))
4295 }
4296 DataType::Decimal256(precision, scale) => {
4297 let max_digits = i256::from_i128(10_i128)
4299 .checked_pow(*precision as u32)
4300 .and_then(|v| v.checked_sub(i256::from_i128(1)))
4301 .unwrap_or(i256::MAX);
4302 Some(ScalarValue::Decimal256(
4303 Some(max_digits),
4304 *precision,
4305 *scale,
4306 ))
4307 }
4308 DataType::Date32 => Some(ScalarValue::Date32(Some(i32::MAX))),
4309 DataType::Date64 => Some(ScalarValue::Date64(Some(i64::MAX))),
4310 DataType::Time32(TimeUnit::Second) => {
4311 Some(ScalarValue::Time32Second(Some(86_399)))
4313 }
4314 DataType::Time32(TimeUnit::Millisecond) => {
4315 Some(ScalarValue::Time32Millisecond(Some(86_399_999)))
4317 }
4318 DataType::Time64(TimeUnit::Microsecond) => {
4319 Some(ScalarValue::Time64Microsecond(Some(86_399_999_999)))
4321 }
4322 DataType::Time64(TimeUnit::Nanosecond) => {
4323 Some(ScalarValue::Time64Nanosecond(Some(86_399_999_999_999)))
4325 }
4326 DataType::Timestamp(unit, tz) => match unit {
4327 TimeUnit::Second => {
4328 Some(ScalarValue::TimestampSecond(Some(i64::MAX), tz.clone()))
4329 }
4330 TimeUnit::Millisecond => Some(ScalarValue::TimestampMillisecond(
4331 Some(i64::MAX),
4332 tz.clone(),
4333 )),
4334 TimeUnit::Microsecond => Some(ScalarValue::TimestampMicrosecond(
4335 Some(i64::MAX),
4336 tz.clone(),
4337 )),
4338 TimeUnit::Nanosecond => {
4339 Some(ScalarValue::TimestampNanosecond(Some(i64::MAX), tz.clone()))
4340 }
4341 },
4342 DataType::Duration(unit) => match unit {
4343 TimeUnit::Second => Some(ScalarValue::DurationSecond(Some(i64::MAX))),
4344 TimeUnit::Millisecond => {
4345 Some(ScalarValue::DurationMillisecond(Some(i64::MAX)))
4346 }
4347 TimeUnit::Microsecond => {
4348 Some(ScalarValue::DurationMicrosecond(Some(i64::MAX)))
4349 }
4350 TimeUnit::Nanosecond => {
4351 Some(ScalarValue::DurationNanosecond(Some(i64::MAX)))
4352 }
4353 },
4354 _ => None,
4355 }
4356 }
4357}
4358
4359pub fn copy_array_data(src_data: &ArrayData) -> ArrayData {
4387 let mut copy = MutableArrayData::new(vec![&src_data], true, src_data.len());
4388 copy.extend(0, 0, src_data.len());
4389 copy.freeze()
4390}
4391
4392macro_rules! impl_scalar {
4393 ($ty:ty, $scalar:tt) => {
4394 impl From<$ty> for ScalarValue {
4395 fn from(value: $ty) -> Self {
4396 ScalarValue::$scalar(Some(value))
4397 }
4398 }
4399
4400 impl From<Option<$ty>> for ScalarValue {
4401 fn from(value: Option<$ty>) -> Self {
4402 ScalarValue::$scalar(value)
4403 }
4404 }
4405 };
4406}
4407
4408impl_scalar!(f64, Float64);
4409impl_scalar!(f32, Float32);
4410impl_scalar!(i8, Int8);
4411impl_scalar!(i16, Int16);
4412impl_scalar!(i32, Int32);
4413impl_scalar!(i64, Int64);
4414impl_scalar!(bool, Boolean);
4415impl_scalar!(u8, UInt8);
4416impl_scalar!(u16, UInt16);
4417impl_scalar!(u32, UInt32);
4418impl_scalar!(u64, UInt64);
4419
4420impl From<&str> for ScalarValue {
4421 fn from(value: &str) -> Self {
4422 Some(value).into()
4423 }
4424}
4425
4426impl From<Option<&str>> for ScalarValue {
4427 fn from(value: Option<&str>) -> Self {
4428 let value = value.map(|s| s.to_string());
4429 value.into()
4430 }
4431}
4432
4433impl From<Vec<(&str, ScalarValue)>> for ScalarValue {
4435 fn from(value: Vec<(&str, ScalarValue)>) -> Self {
4436 value
4437 .into_iter()
4438 .fold(ScalarStructBuilder::new(), |builder, (name, value)| {
4439 builder.with_name_and_scalar(name, value)
4440 })
4441 .build()
4442 .unwrap()
4443 }
4444}
4445
4446impl FromStr for ScalarValue {
4447 type Err = Infallible;
4448
4449 fn from_str(s: &str) -> Result<Self, Self::Err> {
4450 Ok(s.into())
4451 }
4452}
4453
4454impl From<String> for ScalarValue {
4455 fn from(value: String) -> Self {
4456 Some(value).into()
4457 }
4458}
4459
4460impl From<Option<String>> for ScalarValue {
4461 fn from(value: Option<String>) -> Self {
4462 ScalarValue::Utf8(value)
4463 }
4464}
4465
4466macro_rules! impl_try_from {
4467 ($SCALAR:ident, $NATIVE:ident) => {
4468 impl TryFrom<ScalarValue> for $NATIVE {
4469 type Error = DataFusionError;
4470
4471 fn try_from(value: ScalarValue) -> Result<Self> {
4472 match value {
4473 ScalarValue::$SCALAR(Some(inner_value)) => Ok(inner_value),
4474 _ => _internal_err!(
4475 "Cannot convert {:?} to {}",
4476 value,
4477 std::any::type_name::<Self>()
4478 ),
4479 }
4480 }
4481 }
4482 };
4483}
4484
4485impl_try_from!(Int8, i8);
4486impl_try_from!(Int16, i16);
4487
4488impl TryFrom<ScalarValue> for i32 {
4490 type Error = DataFusionError;
4491
4492 fn try_from(value: ScalarValue) -> Result<Self> {
4493 match value {
4494 ScalarValue::Int32(Some(inner_value))
4495 | ScalarValue::Date32(Some(inner_value))
4496 | ScalarValue::Time32Second(Some(inner_value))
4497 | ScalarValue::Time32Millisecond(Some(inner_value)) => Ok(inner_value),
4498 _ => _internal_err!(
4499 "Cannot convert {:?} to {}",
4500 value,
4501 std::any::type_name::<Self>()
4502 ),
4503 }
4504 }
4505}
4506
4507impl TryFrom<ScalarValue> for i64 {
4509 type Error = DataFusionError;
4510
4511 fn try_from(value: ScalarValue) -> Result<Self> {
4512 match value {
4513 ScalarValue::Int64(Some(inner_value))
4514 | ScalarValue::Date64(Some(inner_value))
4515 | ScalarValue::Time64Microsecond(Some(inner_value))
4516 | ScalarValue::Time64Nanosecond(Some(inner_value))
4517 | ScalarValue::TimestampNanosecond(Some(inner_value), _)
4518 | ScalarValue::TimestampMicrosecond(Some(inner_value), _)
4519 | ScalarValue::TimestampMillisecond(Some(inner_value), _)
4520 | ScalarValue::TimestampSecond(Some(inner_value), _) => Ok(inner_value),
4521 _ => _internal_err!(
4522 "Cannot convert {:?} to {}",
4523 value,
4524 std::any::type_name::<Self>()
4525 ),
4526 }
4527 }
4528}
4529
4530impl TryFrom<ScalarValue> for i128 {
4532 type Error = DataFusionError;
4533
4534 fn try_from(value: ScalarValue) -> Result<Self> {
4535 match value {
4536 ScalarValue::Decimal128(Some(inner_value), _, _) => Ok(inner_value),
4537 _ => _internal_err!(
4538 "Cannot convert {:?} to {}",
4539 value,
4540 std::any::type_name::<Self>()
4541 ),
4542 }
4543 }
4544}
4545
4546impl TryFrom<ScalarValue> for i256 {
4548 type Error = DataFusionError;
4549
4550 fn try_from(value: ScalarValue) -> Result<Self> {
4551 match value {
4552 ScalarValue::Decimal256(Some(inner_value), _, _) => Ok(inner_value),
4553 _ => _internal_err!(
4554 "Cannot convert {:?} to {}",
4555 value,
4556 std::any::type_name::<Self>()
4557 ),
4558 }
4559 }
4560}
4561
4562impl_try_from!(UInt8, u8);
4563impl_try_from!(UInt16, u16);
4564impl_try_from!(UInt32, u32);
4565impl_try_from!(UInt64, u64);
4566impl_try_from!(Float32, f32);
4567impl_try_from!(Float64, f64);
4568impl_try_from!(Boolean, bool);
4569
4570impl TryFrom<DataType> for ScalarValue {
4571 type Error = DataFusionError;
4572
4573 fn try_from(datatype: DataType) -> Result<Self> {
4575 (&datatype).try_into()
4576 }
4577}
4578
4579impl TryFrom<&DataType> for ScalarValue {
4580 type Error = DataFusionError;
4581
4582 fn try_from(data_type: &DataType) -> Result<Self> {
4584 Self::try_new_null(data_type)
4585 }
4586}
4587
4588macro_rules! format_option {
4589 ($F:expr, $EXPR:expr) => {{
4590 match $EXPR {
4591 Some(e) => write!($F, "{e}"),
4592 None => write!($F, "NULL"),
4593 }
4594 }};
4595}
4596
4597impl fmt::Display for ScalarValue {
4603 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
4604 match self {
4605 ScalarValue::Decimal32(v, p, s) => {
4606 write!(f, "{v:?},{p:?},{s:?}")?;
4607 }
4608 ScalarValue::Decimal64(v, p, s) => {
4609 write!(f, "{v:?},{p:?},{s:?}")?;
4610 }
4611 ScalarValue::Decimal128(v, p, s) => {
4612 write!(f, "{v:?},{p:?},{s:?}")?;
4613 }
4614 ScalarValue::Decimal256(v, p, s) => {
4615 write!(f, "{v:?},{p:?},{s:?}")?;
4616 }
4617 ScalarValue::Boolean(e) => format_option!(f, e)?,
4618 ScalarValue::Float16(e) => format_option!(f, e)?,
4619 ScalarValue::Float32(e) => format_option!(f, e)?,
4620 ScalarValue::Float64(e) => format_option!(f, e)?,
4621 ScalarValue::Int8(e) => format_option!(f, e)?,
4622 ScalarValue::Int16(e) => format_option!(f, e)?,
4623 ScalarValue::Int32(e) => format_option!(f, e)?,
4624 ScalarValue::Int64(e) => format_option!(f, e)?,
4625 ScalarValue::UInt8(e) => format_option!(f, e)?,
4626 ScalarValue::UInt16(e) => format_option!(f, e)?,
4627 ScalarValue::UInt32(e) => format_option!(f, e)?,
4628 ScalarValue::UInt64(e) => format_option!(f, e)?,
4629 ScalarValue::TimestampSecond(e, _) => format_option!(f, e)?,
4630 ScalarValue::TimestampMillisecond(e, _) => format_option!(f, e)?,
4631 ScalarValue::TimestampMicrosecond(e, _) => format_option!(f, e)?,
4632 ScalarValue::TimestampNanosecond(e, _) => format_option!(f, e)?,
4633 ScalarValue::Utf8(e)
4634 | ScalarValue::LargeUtf8(e)
4635 | ScalarValue::Utf8View(e) => format_option!(f, e)?,
4636 ScalarValue::Binary(e)
4637 | ScalarValue::FixedSizeBinary(_, e)
4638 | ScalarValue::LargeBinary(e)
4639 | ScalarValue::BinaryView(e) => match e {
4640 Some(bytes) => {
4641 for b in bytes.iter().take(10) {
4643 write!(f, "{b:02X}")?;
4644 }
4645 if bytes.len() > 10 {
4646 write!(f, "...")?;
4647 }
4648 }
4649 None => write!(f, "NULL")?,
4650 },
4651 ScalarValue::List(arr) => fmt_list(arr.to_owned() as ArrayRef, f)?,
4652 ScalarValue::LargeList(arr) => fmt_list(arr.to_owned() as ArrayRef, f)?,
4653 ScalarValue::FixedSizeList(arr) => fmt_list(arr.to_owned() as ArrayRef, f)?,
4654 ScalarValue::Date32(e) => format_option!(
4655 f,
4656 e.map(|v| {
4657 let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
4658 match epoch.checked_add_signed(Duration::try_days(v as i64).unwrap())
4659 {
4660 Some(date) => date.to_string(),
4661 None => "".to_string(),
4662 }
4663 })
4664 )?,
4665 ScalarValue::Date64(e) => format_option!(
4666 f,
4667 e.map(|v| {
4668 let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
4669 match epoch.checked_add_signed(Duration::try_milliseconds(v).unwrap())
4670 {
4671 Some(date) => date.to_string(),
4672 None => "".to_string(),
4673 }
4674 })
4675 )?,
4676 ScalarValue::Time32Second(e) => format_option!(f, e)?,
4677 ScalarValue::Time32Millisecond(e) => format_option!(f, e)?,
4678 ScalarValue::Time64Microsecond(e) => format_option!(f, e)?,
4679 ScalarValue::Time64Nanosecond(e) => format_option!(f, e)?,
4680 ScalarValue::IntervalYearMonth(e) => format_option!(f, e)?,
4681 ScalarValue::IntervalMonthDayNano(e) => {
4682 format_option!(f, e.map(|v| format!("{v:?}")))?
4683 }
4684 ScalarValue::IntervalDayTime(e) => {
4685 format_option!(f, e.map(|v| format!("{v:?}")))?;
4686 }
4687 ScalarValue::DurationSecond(e) => format_option!(f, e)?,
4688 ScalarValue::DurationMillisecond(e) => format_option!(f, e)?,
4689 ScalarValue::DurationMicrosecond(e) => format_option!(f, e)?,
4690 ScalarValue::DurationNanosecond(e) => format_option!(f, e)?,
4691 ScalarValue::Struct(struct_arr) => {
4692 assert_eq!(struct_arr.len(), 1);
4694
4695 if struct_arr.null_count() == struct_arr.len() {
4696 write!(f, "NULL")?;
4697 return Ok(());
4698 }
4699
4700 let columns = struct_arr.columns();
4701 let fields = struct_arr.fields();
4702 let nulls = struct_arr.nulls();
4703
4704 write!(
4705 f,
4706 "{{{}}}",
4707 columns
4708 .iter()
4709 .zip(fields.iter())
4710 .map(|(column, field)| {
4711 if nulls.is_some_and(|b| b.is_null(0)) {
4712 format!("{}:NULL", field.name())
4713 } else if let DataType::Struct(_) = field.data_type() {
4714 let sv = ScalarValue::Struct(Arc::new(
4715 column.as_struct().to_owned(),
4716 ));
4717 format!("{}:{sv}", field.name())
4718 } else {
4719 let sv = array_value_to_string(column, 0).unwrap();
4720 format!("{}:{sv}", field.name())
4721 }
4722 })
4723 .collect::<Vec<_>>()
4724 .join(",")
4725 )?
4726 }
4727 ScalarValue::Map(map_arr) => {
4728 if map_arr.null_count() == map_arr.len() {
4729 write!(f, "NULL")?;
4730 return Ok(());
4731 }
4732
4733 write!(
4734 f,
4735 "[{}]",
4736 map_arr
4737 .iter()
4738 .map(|struct_array| {
4739 if let Some(arr) = struct_array {
4740 let mut buffer = VecDeque::new();
4741 for i in 0..arr.len() {
4742 let key =
4743 array_value_to_string(arr.column(0), i).unwrap();
4744 let value =
4745 array_value_to_string(arr.column(1), i).unwrap();
4746 buffer.push_back(format!("{key}:{value}"));
4747 }
4748 format!(
4749 "{{{}}}",
4750 buffer
4751 .into_iter()
4752 .collect::<Vec<_>>()
4753 .join(",")
4754 .as_str()
4755 )
4756 } else {
4757 "NULL".to_string()
4758 }
4759 })
4760 .collect::<Vec<_>>()
4761 .join(",")
4762 )?
4763 }
4764 ScalarValue::Union(val, _fields, _mode) => match val {
4765 Some((id, val)) => write!(f, "{id}:{val}")?,
4766 None => write!(f, "NULL")?,
4767 },
4768 ScalarValue::Dictionary(_k, v) => write!(f, "{v}")?,
4769 ScalarValue::Null => write!(f, "NULL")?,
4770 };
4771 Ok(())
4772 }
4773}
4774
4775fn fmt_list(arr: ArrayRef, f: &mut fmt::Formatter) -> fmt::Result {
4776 assert_eq!(arr.len(), 1);
4778 let options = FormatOptions::default().with_display_error(true);
4779 let formatter =
4780 ArrayFormatter::try_new(arr.as_ref() as &dyn Array, &options).unwrap();
4781 let value_formatter = formatter.value(0);
4782 write!(f, "{value_formatter}")
4783}
4784
4785fn fmt_binary(data: &[u8], f: &mut fmt::Formatter) -> fmt::Result {
4787 let mut iter = data.iter();
4788 if let Some(b) = iter.next() {
4789 write!(f, "{b}")?;
4790 }
4791 for b in iter {
4792 write!(f, ",{b}")?;
4793 }
4794 Ok(())
4795}
4796
4797impl fmt::Debug for ScalarValue {
4798 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
4799 match self {
4800 ScalarValue::Decimal32(_, _, _) => write!(f, "Decimal32({self})"),
4801 ScalarValue::Decimal64(_, _, _) => write!(f, "Decimal64({self})"),
4802 ScalarValue::Decimal128(_, _, _) => write!(f, "Decimal128({self})"),
4803 ScalarValue::Decimal256(_, _, _) => write!(f, "Decimal256({self})"),
4804 ScalarValue::Boolean(_) => write!(f, "Boolean({self})"),
4805 ScalarValue::Float16(_) => write!(f, "Float16({self})"),
4806 ScalarValue::Float32(_) => write!(f, "Float32({self})"),
4807 ScalarValue::Float64(_) => write!(f, "Float64({self})"),
4808 ScalarValue::Int8(_) => write!(f, "Int8({self})"),
4809 ScalarValue::Int16(_) => write!(f, "Int16({self})"),
4810 ScalarValue::Int32(_) => write!(f, "Int32({self})"),
4811 ScalarValue::Int64(_) => write!(f, "Int64({self})"),
4812 ScalarValue::UInt8(_) => write!(f, "UInt8({self})"),
4813 ScalarValue::UInt16(_) => write!(f, "UInt16({self})"),
4814 ScalarValue::UInt32(_) => write!(f, "UInt32({self})"),
4815 ScalarValue::UInt64(_) => write!(f, "UInt64({self})"),
4816 ScalarValue::TimestampSecond(_, tz_opt) => {
4817 write!(f, "TimestampSecond({self}, {tz_opt:?})")
4818 }
4819 ScalarValue::TimestampMillisecond(_, tz_opt) => {
4820 write!(f, "TimestampMillisecond({self}, {tz_opt:?})")
4821 }
4822 ScalarValue::TimestampMicrosecond(_, tz_opt) => {
4823 write!(f, "TimestampMicrosecond({self}, {tz_opt:?})")
4824 }
4825 ScalarValue::TimestampNanosecond(_, tz_opt) => {
4826 write!(f, "TimestampNanosecond({self}, {tz_opt:?})")
4827 }
4828 ScalarValue::Utf8(None) => write!(f, "Utf8({self})"),
4829 ScalarValue::Utf8(Some(_)) => write!(f, "Utf8(\"{self}\")"),
4830 ScalarValue::Utf8View(None) => write!(f, "Utf8View({self})"),
4831 ScalarValue::Utf8View(Some(_)) => write!(f, "Utf8View(\"{self}\")"),
4832 ScalarValue::LargeUtf8(None) => write!(f, "LargeUtf8({self})"),
4833 ScalarValue::LargeUtf8(Some(_)) => write!(f, "LargeUtf8(\"{self}\")"),
4834 ScalarValue::Binary(None) => write!(f, "Binary({self})"),
4835 ScalarValue::Binary(Some(b)) => {
4836 write!(f, "Binary(\"")?;
4837 fmt_binary(b.as_slice(), f)?;
4838 write!(f, "\")")
4839 }
4840 ScalarValue::BinaryView(None) => write!(f, "BinaryView({self})"),
4841 ScalarValue::BinaryView(Some(b)) => {
4842 write!(f, "BinaryView(\"")?;
4843 fmt_binary(b.as_slice(), f)?;
4844 write!(f, "\")")
4845 }
4846 ScalarValue::FixedSizeBinary(size, None) => {
4847 write!(f, "FixedSizeBinary({size}, {self})")
4848 }
4849 ScalarValue::FixedSizeBinary(size, Some(b)) => {
4850 write!(f, "FixedSizeBinary({size}, \"")?;
4851 fmt_binary(b.as_slice(), f)?;
4852 write!(f, "\")")
4853 }
4854 ScalarValue::LargeBinary(None) => write!(f, "LargeBinary({self})"),
4855 ScalarValue::LargeBinary(Some(b)) => {
4856 write!(f, "LargeBinary(\"")?;
4857 fmt_binary(b.as_slice(), f)?;
4858 write!(f, "\")")
4859 }
4860 ScalarValue::FixedSizeList(_) => write!(f, "FixedSizeList({self})"),
4861 ScalarValue::List(_) => write!(f, "List({self})"),
4862 ScalarValue::LargeList(_) => write!(f, "LargeList({self})"),
4863 ScalarValue::Struct(struct_arr) => {
4864 assert_eq!(struct_arr.len(), 1);
4866
4867 let columns = struct_arr.columns();
4868 let fields = struct_arr.fields();
4869
4870 write!(
4871 f,
4872 "Struct({{{}}})",
4873 columns
4874 .iter()
4875 .zip(fields.iter())
4876 .map(|(column, field)| {
4877 let sv = array_value_to_string(column, 0).unwrap();
4878 let name = field.name();
4879 format!("{name}:{sv}")
4880 })
4881 .collect::<Vec<_>>()
4882 .join(",")
4883 )
4884 }
4885 ScalarValue::Map(map_arr) => {
4886 write!(
4887 f,
4888 "Map([{}])",
4889 map_arr
4890 .iter()
4891 .map(|struct_array| {
4892 if let Some(arr) = struct_array {
4893 let buffer: Vec<String> = (0..arr.len())
4894 .map(|i| {
4895 let key = array_value_to_string(arr.column(0), i)
4896 .unwrap();
4897 let value =
4898 array_value_to_string(arr.column(1), i)
4899 .unwrap();
4900 format!("{key:?}:{value:?}")
4901 })
4902 .collect();
4903 format!("{{{}}}", buffer.join(","))
4904 } else {
4905 "NULL".to_string()
4906 }
4907 })
4908 .collect::<Vec<_>>()
4909 .join(",")
4910 )
4911 }
4912 ScalarValue::Date32(_) => write!(f, "Date32(\"{self}\")"),
4913 ScalarValue::Date64(_) => write!(f, "Date64(\"{self}\")"),
4914 ScalarValue::Time32Second(_) => write!(f, "Time32Second(\"{self}\")"),
4915 ScalarValue::Time32Millisecond(_) => {
4916 write!(f, "Time32Millisecond(\"{self}\")")
4917 }
4918 ScalarValue::Time64Microsecond(_) => {
4919 write!(f, "Time64Microsecond(\"{self}\")")
4920 }
4921 ScalarValue::Time64Nanosecond(_) => {
4922 write!(f, "Time64Nanosecond(\"{self}\")")
4923 }
4924 ScalarValue::IntervalDayTime(_) => {
4925 write!(f, "IntervalDayTime(\"{self}\")")
4926 }
4927 ScalarValue::IntervalYearMonth(_) => {
4928 write!(f, "IntervalYearMonth(\"{self}\")")
4929 }
4930 ScalarValue::IntervalMonthDayNano(_) => {
4931 write!(f, "IntervalMonthDayNano(\"{self}\")")
4932 }
4933 ScalarValue::DurationSecond(_) => write!(f, "DurationSecond(\"{self}\")"),
4934 ScalarValue::DurationMillisecond(_) => {
4935 write!(f, "DurationMillisecond(\"{self}\")")
4936 }
4937 ScalarValue::DurationMicrosecond(_) => {
4938 write!(f, "DurationMicrosecond(\"{self}\")")
4939 }
4940 ScalarValue::DurationNanosecond(_) => {
4941 write!(f, "DurationNanosecond(\"{self}\")")
4942 }
4943 ScalarValue::Union(val, _fields, _mode) => match val {
4944 Some((id, val)) => write!(f, "Union {id}:{val}"),
4945 None => write!(f, "Union(NULL)"),
4946 },
4947 ScalarValue::Dictionary(k, v) => write!(f, "Dictionary({k:?}, {v:?})"),
4948 ScalarValue::Null => write!(f, "NULL"),
4949 }
4950 }
4951}
4952
4953pub trait ScalarType<T: ArrowNativeType> {
4955 fn scalar(r: Option<T>) -> ScalarValue;
4957}
4958
4959impl ScalarType<f32> for Float32Type {
4960 fn scalar(r: Option<f32>) -> ScalarValue {
4961 ScalarValue::Float32(r)
4962 }
4963}
4964
4965impl ScalarType<i64> for TimestampSecondType {
4966 fn scalar(r: Option<i64>) -> ScalarValue {
4967 ScalarValue::TimestampSecond(r, None)
4968 }
4969}
4970
4971impl ScalarType<i64> for TimestampMillisecondType {
4972 fn scalar(r: Option<i64>) -> ScalarValue {
4973 ScalarValue::TimestampMillisecond(r, None)
4974 }
4975}
4976
4977impl ScalarType<i64> for TimestampMicrosecondType {
4978 fn scalar(r: Option<i64>) -> ScalarValue {
4979 ScalarValue::TimestampMicrosecond(r, None)
4980 }
4981}
4982
4983impl ScalarType<i64> for TimestampNanosecondType {
4984 fn scalar(r: Option<i64>) -> ScalarValue {
4985 ScalarValue::TimestampNanosecond(r, None)
4986 }
4987}
4988
4989impl ScalarType<i32> for Date32Type {
4990 fn scalar(r: Option<i32>) -> ScalarValue {
4991 ScalarValue::Date32(r)
4992 }
4993}
4994
4995#[cfg(test)]
4996mod tests {
4997 use std::sync::Arc;
4998
4999 use super::*;
5000 use crate::cast::{as_list_array, as_map_array, as_struct_array};
5001 use crate::test_util::batches_to_string;
5002 use arrow::array::{
5003 FixedSizeListBuilder, Int32Builder, LargeListBuilder, ListBuilder, MapBuilder,
5004 NullArray, NullBufferBuilder, OffsetSizeTrait, PrimitiveBuilder, RecordBatch,
5005 StringBuilder, StringDictionaryBuilder, StructBuilder, UnionBuilder,
5006 };
5007 use arrow::buffer::{Buffer, NullBuffer, OffsetBuffer};
5008 use arrow::compute::{is_null, kernels};
5009 use arrow::datatypes::{
5010 ArrowNumericType, Fields, Float64Type, DECIMAL256_MAX_PRECISION,
5011 };
5012 use arrow::error::ArrowError;
5013 use arrow::util::pretty::pretty_format_columns;
5014 use chrono::NaiveDate;
5015 use insta::assert_snapshot;
5016 use rand::Rng;
5017
5018 #[test]
5019 fn test_scalar_value_from_for_map() {
5020 let string_builder = StringBuilder::new();
5021 let int_builder = Int32Builder::with_capacity(4);
5022 let mut builder = MapBuilder::new(None, string_builder, int_builder);
5023 builder.keys().append_value("joe");
5024 builder.values().append_value(1);
5025 builder.append(true).unwrap();
5026
5027 builder.keys().append_value("blogs");
5028 builder.values().append_value(2);
5029 builder.keys().append_value("foo");
5030 builder.values().append_value(4);
5031 builder.append(true).unwrap();
5032 builder.append(true).unwrap();
5033 builder.append(false).unwrap();
5034
5035 let expected = builder.finish();
5036
5037 let sv = ScalarValue::Map(Arc::new(expected.clone()));
5038 let map_arr = sv.to_array().unwrap();
5039 let actual = as_map_array(&map_arr).unwrap();
5040 assert_eq!(actual, &expected);
5041 }
5042
5043 #[test]
5044 fn test_scalar_value_from_for_struct() {
5045 let boolean = Arc::new(BooleanArray::from(vec![false]));
5046 let int = Arc::new(Int32Array::from(vec![42]));
5047
5048 let expected = StructArray::from(vec![
5049 (
5050 Arc::new(Field::new("b", DataType::Boolean, false)),
5051 Arc::clone(&boolean) as ArrayRef,
5052 ),
5053 (
5054 Arc::new(Field::new("c", DataType::Int32, false)),
5055 Arc::clone(&int) as ArrayRef,
5056 ),
5057 ]);
5058
5059 let sv = ScalarStructBuilder::new()
5060 .with_array(Field::new("b", DataType::Boolean, false), boolean)
5061 .with_array(Field::new("c", DataType::Int32, false), int)
5062 .build()
5063 .unwrap();
5064
5065 let struct_arr = sv.to_array().unwrap();
5066 let actual = as_struct_array(&struct_arr).unwrap();
5067 assert_eq!(actual, &expected);
5068 }
5069
5070 #[test]
5071 #[should_panic(
5072 expected = "InvalidArgumentError(\"Incorrect array length for StructArray field \\\"bool\\\", expected 1 got 4\")"
5073 )]
5074 fn test_scalar_value_from_for_struct_should_panic() {
5075 let _ = ScalarStructBuilder::new()
5076 .with_array(
5077 Field::new("bool", DataType::Boolean, false),
5078 Arc::new(BooleanArray::from(vec![false, true, false, false])),
5079 )
5080 .with_array(
5081 Field::new("i32", DataType::Int32, false),
5082 Arc::new(Int32Array::from(vec![42, 28, 19, 31])),
5083 )
5084 .build()
5085 .unwrap();
5086 }
5087
5088 #[test]
5089 fn test_to_array_of_size_for_nested() {
5090 let boolean = Arc::new(BooleanArray::from(vec![false, false, true, true]));
5092 let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
5093
5094 let struct_array = StructArray::from(vec![
5095 (
5096 Arc::new(Field::new("b", DataType::Boolean, false)),
5097 Arc::clone(&boolean) as ArrayRef,
5098 ),
5099 (
5100 Arc::new(Field::new("c", DataType::Int32, false)),
5101 Arc::clone(&int) as ArrayRef,
5102 ),
5103 ]);
5104 let sv = ScalarValue::Struct(Arc::new(struct_array));
5105 let actual_arr = sv.to_array_of_size(2).unwrap();
5106
5107 let boolean = Arc::new(BooleanArray::from(vec![
5108 false, false, true, true, false, false, true, true,
5109 ]));
5110 let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31, 42, 28, 19, 31]));
5111
5112 let struct_array = StructArray::from(vec![
5113 (
5114 Arc::new(Field::new("b", DataType::Boolean, false)),
5115 Arc::clone(&boolean) as ArrayRef,
5116 ),
5117 (
5118 Arc::new(Field::new("c", DataType::Int32, false)),
5119 Arc::clone(&int) as ArrayRef,
5120 ),
5121 ]);
5122
5123 let actual = as_struct_array(&actual_arr).unwrap();
5124 assert_eq!(actual, &struct_array);
5125
5126 let arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
5128 Some(1),
5129 None,
5130 Some(2),
5131 ])]);
5132
5133 let sv = ScalarValue::List(Arc::new(arr));
5134 let actual_arr = sv
5135 .to_array_of_size(2)
5136 .expect("Failed to convert to array of size");
5137 let actual_list_arr = actual_arr.as_list::<i32>();
5138
5139 let arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
5140 Some(vec![Some(1), None, Some(2)]),
5141 Some(vec![Some(1), None, Some(2)]),
5142 ]);
5143
5144 assert_eq!(&arr, actual_list_arr);
5145 }
5146
5147 #[test]
5148 fn test_to_array_of_size_for_fsl() {
5149 let values = Int32Array::from_iter([Some(1), None, Some(2)]);
5150 let field = Arc::new(Field::new_list_field(DataType::Int32, true));
5151 let arr = FixedSizeListArray::new(Arc::clone(&field), 3, Arc::new(values), None);
5152 let sv = ScalarValue::FixedSizeList(Arc::new(arr));
5153 let actual_arr = sv
5154 .to_array_of_size(2)
5155 .expect("Failed to convert to array of size");
5156
5157 let expected_values =
5158 Int32Array::from_iter([Some(1), None, Some(2), Some(1), None, Some(2)]);
5159 let expected_arr =
5160 FixedSizeListArray::new(field, 3, Arc::new(expected_values), None);
5161
5162 assert_eq!(
5163 &expected_arr,
5164 as_fixed_size_list_array(actual_arr.as_ref()).unwrap()
5165 );
5166
5167 let empty_array = sv
5168 .to_array_of_size(0)
5169 .expect("Failed to convert to empty array");
5170
5171 assert_eq!(empty_array.len(), 0);
5172 }
5173
5174 #[test]
5175 fn test_list_to_array_string() {
5176 let scalars = vec![
5177 ScalarValue::from("rust"),
5178 ScalarValue::from("arrow"),
5179 ScalarValue::from("data-fusion"),
5180 ];
5181
5182 let result = ScalarValue::new_list_nullable(scalars.as_slice(), &DataType::Utf8);
5183
5184 let expected = single_row_list_array(vec!["rust", "arrow", "data-fusion"]);
5185 assert_eq!(*result, expected);
5186 }
5187
5188 fn single_row_list_array(items: Vec<&str>) -> ListArray {
5189 SingleRowListArrayBuilder::new(Arc::new(StringArray::from(items)))
5190 .build_list_array()
5191 }
5192
5193 fn build_list<O: OffsetSizeTrait>(
5194 values: Vec<Option<Vec<Option<i64>>>>,
5195 ) -> Vec<ScalarValue> {
5196 values
5197 .into_iter()
5198 .map(|v| {
5199 let arr = if v.is_some() {
5200 Arc::new(
5201 GenericListArray::<O>::from_iter_primitive::<Int64Type, _, _>(
5202 vec![v],
5203 ),
5204 )
5205 } else if O::IS_LARGE {
5206 new_null_array(
5207 &DataType::LargeList(Arc::new(Field::new_list_field(
5208 DataType::Int64,
5209 true,
5210 ))),
5211 1,
5212 )
5213 } else {
5214 new_null_array(
5215 &DataType::List(Arc::new(Field::new_list_field(
5216 DataType::Int64,
5217 true,
5218 ))),
5219 1,
5220 )
5221 };
5222
5223 if O::IS_LARGE {
5224 ScalarValue::LargeList(arr.as_list::<i64>().to_owned().into())
5225 } else {
5226 ScalarValue::List(arr.as_list::<i32>().to_owned().into())
5227 }
5228 })
5229 .collect()
5230 }
5231
5232 #[test]
5233 fn test_iter_to_array_fixed_size_list() {
5234 let field = Arc::new(Field::new_list_field(DataType::Int32, true));
5235 let f1 = Arc::new(FixedSizeListArray::new(
5236 Arc::clone(&field),
5237 3,
5238 Arc::new(Int32Array::from(vec![1, 2, 3])),
5239 None,
5240 ));
5241 let f2 = Arc::new(FixedSizeListArray::new(
5242 Arc::clone(&field),
5243 3,
5244 Arc::new(Int32Array::from(vec![4, 5, 6])),
5245 None,
5246 ));
5247 let f_nulls = Arc::new(FixedSizeListArray::new_null(field, 1, 1));
5248
5249 let scalars = vec![
5250 ScalarValue::FixedSizeList(Arc::clone(&f_nulls)),
5251 ScalarValue::FixedSizeList(f1),
5252 ScalarValue::FixedSizeList(f2),
5253 ScalarValue::FixedSizeList(f_nulls),
5254 ];
5255
5256 let array = ScalarValue::iter_to_array(scalars).unwrap();
5257
5258 let expected = FixedSizeListArray::from_iter_primitive::<Int32Type, _, _>(
5259 vec![
5260 None,
5261 Some(vec![Some(1), Some(2), Some(3)]),
5262 Some(vec![Some(4), Some(5), Some(6)]),
5263 None,
5264 ],
5265 3,
5266 );
5267 assert_eq!(array.as_ref(), &expected);
5268 }
5269
5270 #[test]
5271 fn test_iter_to_array_struct() {
5272 let s1 = StructArray::from(vec![
5273 (
5274 Arc::new(Field::new("A", DataType::Boolean, false)),
5275 Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
5276 ),
5277 (
5278 Arc::new(Field::new("B", DataType::Int32, false)),
5279 Arc::new(Int32Array::from(vec![42])) as ArrayRef,
5280 ),
5281 ]);
5282
5283 let s2 = StructArray::from(vec![
5284 (
5285 Arc::new(Field::new("A", DataType::Boolean, false)),
5286 Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
5287 ),
5288 (
5289 Arc::new(Field::new("B", DataType::Int32, false)),
5290 Arc::new(Int32Array::from(vec![42])) as ArrayRef,
5291 ),
5292 ]);
5293
5294 let scalars = vec![
5295 ScalarValue::Struct(Arc::new(s1)),
5296 ScalarValue::Struct(Arc::new(s2)),
5297 ];
5298
5299 let array = ScalarValue::iter_to_array(scalars).unwrap();
5300
5301 let expected = StructArray::from(vec![
5302 (
5303 Arc::new(Field::new("A", DataType::Boolean, false)),
5304 Arc::new(BooleanArray::from(vec![false, false])) as ArrayRef,
5305 ),
5306 (
5307 Arc::new(Field::new("B", DataType::Int32, false)),
5308 Arc::new(Int32Array::from(vec![42, 42])) as ArrayRef,
5309 ),
5310 ]);
5311 assert_eq!(array.as_ref(), &expected);
5312 }
5313
5314 #[test]
5315 fn test_iter_to_array_struct_with_nulls() {
5316 let s1 = StructArray::from((
5318 vec![
5319 (
5320 Arc::new(Field::new("A", DataType::Int32, false)),
5321 Arc::new(Int32Array::from(vec![1])) as ArrayRef,
5322 ),
5323 (
5324 Arc::new(Field::new("B", DataType::Int64, false)),
5325 Arc::new(Int64Array::from(vec![2])) as ArrayRef,
5326 ),
5327 ],
5328 Buffer::from(&[1]),
5330 ));
5331
5332 let s2 = StructArray::from((
5334 vec![
5335 (
5336 Arc::new(Field::new("A", DataType::Int32, false)),
5337 Arc::new(Int32Array::from(vec![3])) as ArrayRef,
5338 ),
5339 (
5340 Arc::new(Field::new("B", DataType::Int64, false)),
5341 Arc::new(Int64Array::from(vec![4])) as ArrayRef,
5342 ),
5343 ],
5344 Buffer::from(&[0]),
5345 ));
5346
5347 let scalars = vec![
5348 ScalarValue::Struct(Arc::new(s1)),
5349 ScalarValue::Struct(Arc::new(s2)),
5350 ];
5351
5352 let array = ScalarValue::iter_to_array(scalars).unwrap();
5353 let struct_array = array.as_struct();
5354 assert!(struct_array.is_valid(0));
5355 assert!(struct_array.is_null(1));
5356 }
5357
5358 #[test]
5359 fn iter_to_array_primitive_test() {
5360 let scalars = build_list::<i32>(vec![
5362 Some(vec![Some(1), Some(2), Some(3)]),
5363 None,
5364 Some(vec![Some(4), Some(5)]),
5365 ]);
5366
5367 let array = ScalarValue::iter_to_array(scalars).unwrap();
5368 let list_array = as_list_array(&array).unwrap();
5369 let expected = ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
5371 Some(vec![Some(1), Some(2), Some(3)]),
5372 None,
5373 Some(vec![Some(4), Some(5)]),
5374 ]);
5375 assert_eq!(list_array, &expected);
5376
5377 let scalars = build_list::<i64>(vec![
5378 Some(vec![Some(1), Some(2), Some(3)]),
5379 None,
5380 Some(vec![Some(4), Some(5)]),
5381 ]);
5382
5383 let array = ScalarValue::iter_to_array(scalars).unwrap();
5384 let list_array = as_large_list_array(&array).unwrap();
5385 let expected = LargeListArray::from_iter_primitive::<Int64Type, _, _>(vec![
5386 Some(vec![Some(1), Some(2), Some(3)]),
5387 None,
5388 Some(vec![Some(4), Some(5)]),
5389 ]);
5390 assert_eq!(list_array, &expected);
5391 }
5392
5393 #[test]
5394 fn iter_to_array_string_test() {
5395 let arr1 = single_row_list_array(vec!["foo", "bar", "baz"]);
5396 let arr2 = single_row_list_array(vec!["rust", "world"]);
5397
5398 let scalars = vec![
5399 ScalarValue::List(Arc::new(arr1)),
5400 ScalarValue::List(Arc::new(arr2)),
5401 ];
5402
5403 let array = ScalarValue::iter_to_array(scalars).unwrap();
5404 let result = array.as_list::<i32>();
5405
5406 let string_builder = StringBuilder::with_capacity(5, 25);
5408 let mut list_of_string_builder = ListBuilder::new(string_builder);
5409
5410 list_of_string_builder.values().append_value("foo");
5411 list_of_string_builder.values().append_value("bar");
5412 list_of_string_builder.values().append_value("baz");
5413 list_of_string_builder.append(true);
5414
5415 list_of_string_builder.values().append_value("rust");
5416 list_of_string_builder.values().append_value("world");
5417 list_of_string_builder.append(true);
5418 let expected = list_of_string_builder.finish();
5419
5420 assert_eq!(result, &expected);
5421 }
5422
5423 #[test]
5424 fn test_list_scalar_eq_to_array() {
5425 let list_array: ArrayRef =
5426 Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
5427 Some(vec![Some(0), Some(1), Some(2)]),
5428 None,
5429 Some(vec![None, Some(5)]),
5430 ]));
5431
5432 let fsl_array: ArrayRef =
5433 Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
5434 Some(vec![Some(0), Some(1), Some(2)]),
5435 None,
5436 Some(vec![Some(3), None, Some(5)]),
5437 ]));
5438
5439 for arr in [list_array, fsl_array] {
5440 for i in 0..arr.len() {
5441 let scalar =
5442 ScalarValue::List(arr.slice(i, 1).as_list::<i32>().to_owned().into());
5443 assert!(scalar.eq_array(&arr, i).unwrap());
5444 }
5445 }
5446 }
5447
5448 #[test]
5449 fn test_eq_array_err_message() {
5450 assert_starts_with(
5451 ScalarValue::Utf8(Some("123".to_string()))
5452 .eq_array(&(Arc::new(Int32Array::from(vec![123])) as ArrayRef), 0)
5453 .unwrap_err()
5454 .message(),
5455 "could not cast array of type Int32 to arrow_array::array::byte_array::GenericByteArray<arrow_array::types::GenericStringType<i32>>",
5456 );
5457 }
5458
5459 #[test]
5460 fn scalar_add_trait_test() -> Result<()> {
5461 let float_value = ScalarValue::Float64(Some(123.));
5462 let float_value_2 = ScalarValue::Float64(Some(123.));
5463 assert_eq!(
5464 (float_value.add(&float_value_2))?,
5465 ScalarValue::Float64(Some(246.))
5466 );
5467 assert_eq!(
5468 (float_value.add(float_value_2))?,
5469 ScalarValue::Float64(Some(246.))
5470 );
5471 Ok(())
5472 }
5473
5474 #[test]
5475 fn scalar_sub_trait_test() -> Result<()> {
5476 let float_value = ScalarValue::Float64(Some(123.));
5477 let float_value_2 = ScalarValue::Float64(Some(123.));
5478 assert_eq!(
5479 float_value.sub(&float_value_2)?,
5480 ScalarValue::Float64(Some(0.))
5481 );
5482 assert_eq!(
5483 float_value.sub(float_value_2)?,
5484 ScalarValue::Float64(Some(0.))
5485 );
5486 Ok(())
5487 }
5488
5489 #[test]
5490 fn scalar_sub_trait_int32_test() -> Result<()> {
5491 let int_value = ScalarValue::Int32(Some(42));
5492 let int_value_2 = ScalarValue::Int32(Some(100));
5493 assert_eq!(int_value.sub(&int_value_2)?, ScalarValue::Int32(Some(-58)));
5494 assert_eq!(int_value_2.sub(int_value)?, ScalarValue::Int32(Some(58)));
5495 Ok(())
5496 }
5497
5498 #[test]
5499 fn scalar_sub_trait_int32_overflow_test() {
5500 let int_value = ScalarValue::Int32(Some(i32::MAX));
5501 let int_value_2 = ScalarValue::Int32(Some(i32::MIN));
5502 let err = int_value
5503 .sub_checked(&int_value_2)
5504 .unwrap_err()
5505 .strip_backtrace();
5506 assert_eq!(
5507 err,
5508 "Arrow error: Arithmetic overflow: Overflow happened on: 2147483647 - -2147483648"
5509 )
5510 }
5511
5512 #[test]
5513 fn scalar_sub_trait_int64_test() -> Result<()> {
5514 let int_value = ScalarValue::Int64(Some(42));
5515 let int_value_2 = ScalarValue::Int64(Some(100));
5516 assert_eq!(int_value.sub(&int_value_2)?, ScalarValue::Int64(Some(-58)));
5517 assert_eq!(int_value_2.sub(int_value)?, ScalarValue::Int64(Some(58)));
5518 Ok(())
5519 }
5520
5521 #[test]
5522 fn scalar_sub_trait_int64_overflow_test() {
5523 let int_value = ScalarValue::Int64(Some(i64::MAX));
5524 let int_value_2 = ScalarValue::Int64(Some(i64::MIN));
5525 let err = int_value
5526 .sub_checked(&int_value_2)
5527 .unwrap_err()
5528 .strip_backtrace();
5529 assert_eq!(err, "Arrow error: Arithmetic overflow: Overflow happened on: 9223372036854775807 - -9223372036854775808")
5530 }
5531
5532 #[test]
5533 fn scalar_add_overflow_test() -> Result<()> {
5534 check_scalar_add_overflow::<Int8Type>(
5535 ScalarValue::Int8(Some(i8::MAX)),
5536 ScalarValue::Int8(Some(i8::MAX)),
5537 );
5538 check_scalar_add_overflow::<UInt8Type>(
5539 ScalarValue::UInt8(Some(u8::MAX)),
5540 ScalarValue::UInt8(Some(u8::MAX)),
5541 );
5542 check_scalar_add_overflow::<Int16Type>(
5543 ScalarValue::Int16(Some(i16::MAX)),
5544 ScalarValue::Int16(Some(i16::MAX)),
5545 );
5546 check_scalar_add_overflow::<UInt16Type>(
5547 ScalarValue::UInt16(Some(u16::MAX)),
5548 ScalarValue::UInt16(Some(u16::MAX)),
5549 );
5550 check_scalar_add_overflow::<Int32Type>(
5551 ScalarValue::Int32(Some(i32::MAX)),
5552 ScalarValue::Int32(Some(i32::MAX)),
5553 );
5554 check_scalar_add_overflow::<UInt32Type>(
5555 ScalarValue::UInt32(Some(u32::MAX)),
5556 ScalarValue::UInt32(Some(u32::MAX)),
5557 );
5558 check_scalar_add_overflow::<Int64Type>(
5559 ScalarValue::Int64(Some(i64::MAX)),
5560 ScalarValue::Int64(Some(i64::MAX)),
5561 );
5562 check_scalar_add_overflow::<UInt64Type>(
5563 ScalarValue::UInt64(Some(u64::MAX)),
5564 ScalarValue::UInt64(Some(u64::MAX)),
5565 );
5566
5567 Ok(())
5568 }
5569
5570 fn check_scalar_add_overflow<T>(left: ScalarValue, right: ScalarValue)
5572 where
5573 T: ArrowNumericType,
5574 {
5575 let scalar_result = left.add_checked(&right);
5576
5577 let left_array = left.to_array().expect("Failed to convert to array");
5578 let right_array = right.to_array().expect("Failed to convert to array");
5579 let arrow_left_array = left_array.as_primitive::<T>();
5580 let arrow_right_array = right_array.as_primitive::<T>();
5581 let arrow_result = add(arrow_left_array, arrow_right_array);
5582
5583 assert_eq!(scalar_result.is_ok(), arrow_result.is_ok());
5584 }
5585
5586 #[test]
5587 fn test_interval_add_timestamp() -> Result<()> {
5588 let interval = ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano {
5589 months: 1,
5590 days: 2,
5591 nanoseconds: 3,
5592 }));
5593 let timestamp = ScalarValue::TimestampNanosecond(Some(123), None);
5594 let result = interval.add(×tamp)?;
5595 let expect = timestamp.add(&interval)?;
5596 assert_eq!(result, expect);
5597
5598 let interval = ScalarValue::IntervalYearMonth(Some(123));
5599 let timestamp = ScalarValue::TimestampNanosecond(Some(123), None);
5600 let result = interval.add(×tamp)?;
5601 let expect = timestamp.add(&interval)?;
5602 assert_eq!(result, expect);
5603
5604 let interval = ScalarValue::IntervalDayTime(Some(IntervalDayTime {
5605 days: 1,
5606 milliseconds: 23,
5607 }));
5608 let timestamp = ScalarValue::TimestampNanosecond(Some(123), None);
5609 let result = interval.add(×tamp)?;
5610 let expect = timestamp.add(&interval)?;
5611 assert_eq!(result, expect);
5612 Ok(())
5613 }
5614
5615 #[test]
5616 fn test_try_cmp() {
5617 assert_eq!(
5618 ScalarValue::try_cmp(
5619 &ScalarValue::Int32(Some(1)),
5620 &ScalarValue::Int32(Some(2))
5621 )
5622 .unwrap(),
5623 Ordering::Less
5624 );
5625 assert_eq!(
5626 ScalarValue::try_cmp(&ScalarValue::Int32(None), &ScalarValue::Int32(Some(2)))
5627 .unwrap(),
5628 Ordering::Less
5629 );
5630 assert_starts_with(
5631 ScalarValue::try_cmp(
5632 &ScalarValue::Int32(Some(1)),
5633 &ScalarValue::Int64(Some(2)),
5634 )
5635 .unwrap_err()
5636 .message(),
5637 "Uncomparable values: Int32(1), Int64(2)",
5638 );
5639 }
5640
5641 #[test]
5642 fn scalar_decimal_test() -> Result<()> {
5643 let decimal_value = ScalarValue::Decimal128(Some(123), 10, 1);
5644 assert_eq!(DataType::Decimal128(10, 1), decimal_value.data_type());
5645 let try_into_value: i128 = decimal_value.clone().try_into().unwrap();
5646 assert_eq!(123_i128, try_into_value);
5647 assert!(!decimal_value.is_null());
5648 let neg_decimal_value = decimal_value.arithmetic_negate()?;
5649 match neg_decimal_value {
5650 ScalarValue::Decimal128(v, _, _) => {
5651 assert_eq!(-123, v.unwrap());
5652 }
5653 _ => {
5654 unreachable!();
5655 }
5656 }
5657
5658 let array = decimal_value
5660 .to_array()
5661 .expect("Failed to convert to array");
5662 let array = as_decimal128_array(&array)?;
5663 assert_eq!(1, array.len());
5664 assert_eq!(DataType::Decimal128(10, 1), array.data_type().clone());
5665 assert_eq!(123i128, array.value(0));
5666
5667 let array = decimal_value
5669 .to_array_of_size(10)
5670 .expect("Failed to convert to array of size");
5671 let array_decimal = as_decimal128_array(&array)?;
5672 assert_eq!(10, array.len());
5673 assert_eq!(DataType::Decimal128(10, 1), array.data_type().clone());
5674 assert_eq!(123i128, array_decimal.value(0));
5675 assert_eq!(123i128, array_decimal.value(9));
5676 assert!(decimal_value
5678 .eq_array(&array, 1)
5679 .expect("Failed to compare arrays"));
5680 assert!(decimal_value
5681 .eq_array(&array, 5)
5682 .expect("Failed to compare arrays"));
5683 assert_eq!(
5685 decimal_value,
5686 ScalarValue::try_from_array(&array, 5).unwrap()
5687 );
5688
5689 assert_eq!(
5690 decimal_value,
5691 ScalarValue::try_new_decimal128(123, 10, 1).unwrap()
5692 );
5693
5694 let left = ScalarValue::Decimal128(Some(123), 10, 2);
5696 let right = ScalarValue::Decimal128(Some(124), 10, 2);
5697 assert!(!left.eq(&right));
5698 let result = left < right;
5699 assert!(result);
5700 let result = left <= right;
5701 assert!(result);
5702 let right = ScalarValue::Decimal128(Some(124), 10, 3);
5703 let result = left.partial_cmp(&right);
5705 assert_eq!(None, result);
5706
5707 let decimal_vec = vec![
5708 ScalarValue::Decimal128(Some(1), 10, 2),
5709 ScalarValue::Decimal128(Some(2), 10, 2),
5710 ScalarValue::Decimal128(Some(3), 10, 2),
5711 ];
5712 let array = ScalarValue::iter_to_array(decimal_vec).unwrap();
5714 assert_eq!(3, array.len());
5715 assert_eq!(DataType::Decimal128(10, 2), array.data_type().clone());
5716
5717 let decimal_vec = vec![
5718 ScalarValue::Decimal128(Some(1), 10, 2),
5719 ScalarValue::Decimal128(Some(2), 10, 2),
5720 ScalarValue::Decimal128(Some(3), 10, 2),
5721 ScalarValue::Decimal128(None, 10, 2),
5722 ];
5723 let array = ScalarValue::iter_to_array(decimal_vec).unwrap();
5724 assert_eq!(4, array.len());
5725 assert_eq!(DataType::Decimal128(10, 2), array.data_type().clone());
5726
5727 assert!(ScalarValue::try_new_decimal128(1, 10, 2)
5728 .unwrap()
5729 .eq_array(&array, 0)
5730 .expect("Failed to compare arrays"));
5731 assert!(ScalarValue::try_new_decimal128(2, 10, 2)
5732 .unwrap()
5733 .eq_array(&array, 1)
5734 .expect("Failed to compare arrays"));
5735 assert!(ScalarValue::try_new_decimal128(3, 10, 2)
5736 .unwrap()
5737 .eq_array(&array, 2)
5738 .expect("Failed to compare arrays"));
5739 assert_eq!(
5740 ScalarValue::Decimal128(None, 10, 2),
5741 ScalarValue::try_from_array(&array, 3).unwrap()
5742 );
5743
5744 Ok(())
5745 }
5746
5747 #[test]
5748 fn test_new_one_decimal128() {
5749 assert_eq!(
5750 ScalarValue::new_one(&DataType::Decimal128(5, 0)).unwrap(),
5751 ScalarValue::Decimal128(Some(1), 5, 0)
5752 );
5753 assert_eq!(
5754 ScalarValue::new_one(&DataType::Decimal128(5, 1)).unwrap(),
5755 ScalarValue::Decimal128(Some(10), 5, 1)
5756 );
5757 assert_eq!(
5758 ScalarValue::new_one(&DataType::Decimal128(5, 2)).unwrap(),
5759 ScalarValue::Decimal128(Some(100), 5, 2)
5760 );
5761 assert_eq!(
5763 ScalarValue::new_one(&DataType::Decimal128(7, 2)).unwrap(),
5764 ScalarValue::Decimal128(Some(100), 7, 2)
5765 );
5766 assert!(ScalarValue::new_one(&DataType::Decimal128(5, -1)).is_err());
5768 assert!(ScalarValue::new_one(&DataType::Decimal128(0, 2)).is_err());
5770 assert!(ScalarValue::new_one(&DataType::Decimal128(5, 7)).is_err());
5771 }
5772
5773 #[test]
5774 fn test_new_one_decimal256() {
5775 assert_eq!(
5776 ScalarValue::new_one(&DataType::Decimal256(5, 0)).unwrap(),
5777 ScalarValue::Decimal256(Some(1.into()), 5, 0)
5778 );
5779 assert_eq!(
5780 ScalarValue::new_one(&DataType::Decimal256(5, 1)).unwrap(),
5781 ScalarValue::Decimal256(Some(10.into()), 5, 1)
5782 );
5783 assert_eq!(
5784 ScalarValue::new_one(&DataType::Decimal256(5, 2)).unwrap(),
5785 ScalarValue::Decimal256(Some(100.into()), 5, 2)
5786 );
5787 assert_eq!(
5789 ScalarValue::new_one(&DataType::Decimal256(7, 2)).unwrap(),
5790 ScalarValue::Decimal256(Some(100.into()), 7, 2)
5791 );
5792 assert!(ScalarValue::new_one(&DataType::Decimal256(5, -1)).is_err());
5794 assert!(ScalarValue::new_one(&DataType::Decimal256(0, 2)).is_err());
5796 assert!(ScalarValue::new_one(&DataType::Decimal256(5, 7)).is_err());
5797 }
5798
5799 #[test]
5800 fn test_new_ten_decimal128() {
5801 assert_eq!(
5802 ScalarValue::new_ten(&DataType::Decimal128(5, 1)).unwrap(),
5803 ScalarValue::Decimal128(Some(100), 5, 1)
5804 );
5805 assert_eq!(
5806 ScalarValue::new_ten(&DataType::Decimal128(5, 2)).unwrap(),
5807 ScalarValue::Decimal128(Some(1000), 5, 2)
5808 );
5809 assert_eq!(
5811 ScalarValue::new_ten(&DataType::Decimal128(7, 2)).unwrap(),
5812 ScalarValue::Decimal128(Some(1000), 7, 2)
5813 );
5814 assert!(ScalarValue::new_ten(&DataType::Decimal128(5, -1)).is_err());
5816 assert!(ScalarValue::new_ten(&DataType::Decimal128(0, 2)).is_err());
5818 assert!(ScalarValue::new_ten(&DataType::Decimal128(5, 7)).is_err());
5819 }
5820
5821 #[test]
5822 fn test_new_ten_decimal256() {
5823 assert_eq!(
5824 ScalarValue::new_ten(&DataType::Decimal256(5, 1)).unwrap(),
5825 ScalarValue::Decimal256(Some(100.into()), 5, 1)
5826 );
5827 assert_eq!(
5828 ScalarValue::new_ten(&DataType::Decimal256(5, 2)).unwrap(),
5829 ScalarValue::Decimal256(Some(1000.into()), 5, 2)
5830 );
5831 assert_eq!(
5833 ScalarValue::new_ten(&DataType::Decimal256(7, 2)).unwrap(),
5834 ScalarValue::Decimal256(Some(1000.into()), 7, 2)
5835 );
5836 assert!(ScalarValue::new_ten(&DataType::Decimal256(5, -1)).is_err());
5838 assert!(ScalarValue::new_ten(&DataType::Decimal256(0, 2)).is_err());
5840 assert!(ScalarValue::new_ten(&DataType::Decimal256(5, 7)).is_err());
5841 }
5842
5843 #[test]
5844 fn test_new_negative_one_decimal128() {
5845 assert_eq!(
5846 ScalarValue::new_negative_one(&DataType::Decimal128(5, 0)).unwrap(),
5847 ScalarValue::Decimal128(Some(-1), 5, 0)
5848 );
5849 assert_eq!(
5850 ScalarValue::new_negative_one(&DataType::Decimal128(5, 2)).unwrap(),
5851 ScalarValue::Decimal128(Some(-100), 5, 2)
5852 );
5853 }
5854
5855 #[test]
5856 fn test_list_partial_cmp() {
5857 let a =
5858 ScalarValue::List(Arc::new(
5859 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5860 Some(1),
5861 Some(2),
5862 Some(3),
5863 ])]),
5864 ));
5865 let b =
5866 ScalarValue::List(Arc::new(
5867 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5868 Some(1),
5869 Some(2),
5870 Some(3),
5871 ])]),
5872 ));
5873 assert_eq!(a.partial_cmp(&b), Some(Ordering::Equal));
5874
5875 let a =
5876 ScalarValue::List(Arc::new(
5877 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5878 Some(10),
5879 Some(2),
5880 Some(3),
5881 ])]),
5882 ));
5883 let b =
5884 ScalarValue::List(Arc::new(
5885 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5886 Some(1),
5887 Some(2),
5888 Some(30),
5889 ])]),
5890 ));
5891 assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
5892
5893 let a =
5894 ScalarValue::List(Arc::new(
5895 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5896 Some(10),
5897 Some(2),
5898 Some(3),
5899 ])]),
5900 ));
5901 let b =
5902 ScalarValue::List(Arc::new(
5903 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5904 Some(10),
5905 Some(2),
5906 Some(30),
5907 ])]),
5908 ));
5909 assert_eq!(a.partial_cmp(&b), Some(Ordering::Less));
5910
5911 let a =
5912 ScalarValue::List(Arc::new(
5913 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5914 Some(1),
5915 Some(2),
5916 Some(3),
5917 ])]),
5918 ));
5919 let b =
5920 ScalarValue::List(Arc::new(
5921 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5922 Some(2),
5923 Some(3),
5924 ])]),
5925 ));
5926 assert_eq!(a.partial_cmp(&b), Some(Ordering::Less));
5927
5928 let a =
5929 ScalarValue::List(Arc::new(
5930 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5931 Some(2),
5932 Some(3),
5933 Some(4),
5934 ])]),
5935 ));
5936 let b =
5937 ScalarValue::List(Arc::new(
5938 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5939 Some(1),
5940 Some(2),
5941 ])]),
5942 ));
5943 assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
5944
5945 let a =
5946 ScalarValue::List(Arc::new(
5947 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5948 Some(1),
5949 Some(2),
5950 Some(3),
5951 ])]),
5952 ));
5953 let b =
5954 ScalarValue::List(Arc::new(
5955 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5956 Some(1),
5957 Some(2),
5958 ])]),
5959 ));
5960 assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
5961
5962 let a =
5963 ScalarValue::List(Arc::new(
5964 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5965 None,
5966 Some(2),
5967 Some(3),
5968 ])]),
5969 ));
5970 let b =
5971 ScalarValue::List(Arc::new(
5972 ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5973 Some(1),
5974 Some(2),
5975 Some(3),
5976 ])]),
5977 ));
5978 assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
5979
5980 let a = ScalarValue::LargeList(Arc::new(LargeListArray::from_iter_primitive::<
5981 Int64Type,
5982 _,
5983 _,
5984 >(vec![Some(vec![
5985 None,
5986 Some(2),
5987 Some(3),
5988 ])])));
5989 let b = ScalarValue::LargeList(Arc::new(LargeListArray::from_iter_primitive::<
5990 Int64Type,
5991 _,
5992 _,
5993 >(vec![Some(vec![
5994 Some(1),
5995 Some(2),
5996 Some(3),
5997 ])])));
5998 assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
5999
6000 let a = ScalarValue::FixedSizeList(Arc::new(
6001 FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(
6002 vec![Some(vec![None, Some(2), Some(3)])],
6003 3,
6004 ),
6005 ));
6006 let b = ScalarValue::FixedSizeList(Arc::new(
6007 FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(
6008 vec![Some(vec![Some(1), Some(2), Some(3)])],
6009 3,
6010 ),
6011 ));
6012 assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
6013 }
6014
6015 #[test]
6016 fn scalar_value_to_array_u64() -> Result<()> {
6017 let value = ScalarValue::UInt64(Some(13u64));
6018 let array = value.to_array().expect("Failed to convert to array");
6019 let array = as_uint64_array(&array)?;
6020 assert_eq!(array.len(), 1);
6021 assert!(!array.is_null(0));
6022 assert_eq!(array.value(0), 13);
6023
6024 let value = ScalarValue::UInt64(None);
6025 let array = value.to_array().expect("Failed to convert to array");
6026 let array = as_uint64_array(&array)?;
6027 assert_eq!(array.len(), 1);
6028 assert!(array.is_null(0));
6029 Ok(())
6030 }
6031
6032 #[test]
6033 fn scalar_value_to_array_u32() -> Result<()> {
6034 let value = ScalarValue::UInt32(Some(13u32));
6035 let array = value.to_array().expect("Failed to convert to array");
6036 let array = as_uint32_array(&array)?;
6037 assert_eq!(array.len(), 1);
6038 assert!(!array.is_null(0));
6039 assert_eq!(array.value(0), 13);
6040
6041 let value = ScalarValue::UInt32(None);
6042 let array = value.to_array().expect("Failed to convert to array");
6043 let array = as_uint32_array(&array)?;
6044 assert_eq!(array.len(), 1);
6045 assert!(array.is_null(0));
6046 Ok(())
6047 }
6048
6049 #[test]
6050 fn scalar_list_null_to_array() {
6051 let list_array = ScalarValue::new_list_nullable(&[], &DataType::UInt64);
6052
6053 assert_eq!(list_array.len(), 1);
6054 assert_eq!(list_array.values().len(), 0);
6055 }
6056
6057 #[test]
6058 fn scalar_large_list_null_to_array() {
6059 let list_array = ScalarValue::new_large_list(&[], &DataType::UInt64);
6060
6061 assert_eq!(list_array.len(), 1);
6062 assert_eq!(list_array.values().len(), 0);
6063 }
6064
6065 #[test]
6066 fn scalar_list_to_array() -> Result<()> {
6067 let values = vec![
6068 ScalarValue::UInt64(Some(100)),
6069 ScalarValue::UInt64(None),
6070 ScalarValue::UInt64(Some(101)),
6071 ];
6072 let list_array = ScalarValue::new_list_nullable(&values, &DataType::UInt64);
6073 assert_eq!(list_array.len(), 1);
6074 assert_eq!(list_array.values().len(), 3);
6075
6076 let prim_array_ref = list_array.value(0);
6077 let prim_array = as_uint64_array(&prim_array_ref)?;
6078 assert_eq!(prim_array.len(), 3);
6079 assert_eq!(prim_array.value(0), 100);
6080 assert!(prim_array.is_null(1));
6081 assert_eq!(prim_array.value(2), 101);
6082 Ok(())
6083 }
6084
6085 #[test]
6086 fn scalar_large_list_to_array() -> Result<()> {
6087 let values = vec![
6088 ScalarValue::UInt64(Some(100)),
6089 ScalarValue::UInt64(None),
6090 ScalarValue::UInt64(Some(101)),
6091 ];
6092 let list_array = ScalarValue::new_large_list(&values, &DataType::UInt64);
6093 assert_eq!(list_array.len(), 1);
6094 assert_eq!(list_array.values().len(), 3);
6095
6096 let prim_array_ref = list_array.value(0);
6097 let prim_array = as_uint64_array(&prim_array_ref)?;
6098 assert_eq!(prim_array.len(), 3);
6099 assert_eq!(prim_array.value(0), 100);
6100 assert!(prim_array.is_null(1));
6101 assert_eq!(prim_array.value(2), 101);
6102 Ok(())
6103 }
6104
6105 macro_rules! check_scalar_iter {
6107 ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
6108 let scalars: Vec<_> =
6109 $INPUT.iter().map(|v| ScalarValue::$SCALAR_T(*v)).collect();
6110
6111 let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
6112
6113 let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT));
6114
6115 assert_eq!(&array, &expected);
6116 }};
6117 }
6118
6119 macro_rules! check_scalar_iter_tz {
6122 ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
6123 let scalars: Vec<_> = $INPUT
6124 .iter()
6125 .map(|v| ScalarValue::$SCALAR_T(*v, None))
6126 .collect();
6127
6128 let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
6129
6130 let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT));
6131
6132 assert_eq!(&array, &expected);
6133 }};
6134 }
6135
6136 macro_rules! check_scalar_iter_string {
6139 ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
6140 let scalars: Vec<_> = $INPUT
6141 .iter()
6142 .map(|v| ScalarValue::$SCALAR_T(v.map(|v| v.to_string())))
6143 .collect();
6144
6145 let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
6146
6147 let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT));
6148
6149 assert_eq!(&array, &expected);
6150 }};
6151 }
6152
6153 macro_rules! check_scalar_iter_binary {
6156 ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
6157 let scalars: Vec<_> = $INPUT
6158 .iter()
6159 .map(|v| ScalarValue::$SCALAR_T(v.map(|v| v.to_vec())))
6160 .collect();
6161
6162 let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
6163
6164 let expected: $ARRAYTYPE =
6165 $INPUT.iter().map(|v| v.map(|v| v.to_vec())).collect();
6166
6167 let expected: ArrayRef = Arc::new(expected);
6168
6169 assert_eq!(&array, &expected);
6170 }};
6171 }
6172
6173 #[test]
6174 #[allow(clippy::useless_vec)]
6176 fn scalar_iter_to_array_boolean() {
6177 check_scalar_iter!(Boolean, BooleanArray, vec![Some(true), None, Some(false)]);
6178 check_scalar_iter!(Float32, Float32Array, vec![Some(1.9), None, Some(-2.1)]);
6179 check_scalar_iter!(Float64, Float64Array, vec![Some(1.9), None, Some(-2.1)]);
6180
6181 check_scalar_iter!(Int8, Int8Array, vec![Some(1), None, Some(3)]);
6182 check_scalar_iter!(Int16, Int16Array, vec![Some(1), None, Some(3)]);
6183 check_scalar_iter!(Int32, Int32Array, vec![Some(1), None, Some(3)]);
6184 check_scalar_iter!(Int64, Int64Array, vec![Some(1), None, Some(3)]);
6185
6186 check_scalar_iter!(UInt8, UInt8Array, vec![Some(1), None, Some(3)]);
6187 check_scalar_iter!(UInt16, UInt16Array, vec![Some(1), None, Some(3)]);
6188 check_scalar_iter!(UInt32, UInt32Array, vec![Some(1), None, Some(3)]);
6189 check_scalar_iter!(UInt64, UInt64Array, vec![Some(1), None, Some(3)]);
6190
6191 check_scalar_iter_tz!(
6192 TimestampSecond,
6193 TimestampSecondArray,
6194 vec![Some(1), None, Some(3)]
6195 );
6196 check_scalar_iter_tz!(
6197 TimestampMillisecond,
6198 TimestampMillisecondArray,
6199 vec![Some(1), None, Some(3)]
6200 );
6201 check_scalar_iter_tz!(
6202 TimestampMicrosecond,
6203 TimestampMicrosecondArray,
6204 vec![Some(1), None, Some(3)]
6205 );
6206 check_scalar_iter_tz!(
6207 TimestampNanosecond,
6208 TimestampNanosecondArray,
6209 vec![Some(1), None, Some(3)]
6210 );
6211
6212 check_scalar_iter_string!(
6213 Utf8,
6214 StringArray,
6215 vec![Some("foo"), None, Some("bar")]
6216 );
6217 check_scalar_iter_string!(
6218 LargeUtf8,
6219 LargeStringArray,
6220 vec![Some("foo"), None, Some("bar")]
6221 );
6222 check_scalar_iter_binary!(
6223 Binary,
6224 BinaryArray,
6225 vec![Some(b"foo"), None, Some(b"bar")]
6226 );
6227 check_scalar_iter_binary!(
6228 LargeBinary,
6229 LargeBinaryArray,
6230 vec![Some(b"foo"), None, Some(b"bar")]
6231 );
6232 }
6233
6234 #[test]
6235 fn scalar_iter_to_array_empty() {
6236 let scalars = vec![] as Vec<ScalarValue>;
6237
6238 let result = ScalarValue::iter_to_array(scalars).unwrap_err();
6239 assert!(
6240 result
6241 .to_string()
6242 .contains("Empty iterator passed to ScalarValue::iter_to_array"),
6243 "{}",
6244 result
6245 );
6246 }
6247
6248 #[test]
6249 fn scalar_iter_to_dictionary() {
6250 fn make_val(v: Option<String>) -> ScalarValue {
6251 let key_type = DataType::Int32;
6252 let value = ScalarValue::Utf8(v);
6253 ScalarValue::Dictionary(Box::new(key_type), Box::new(value))
6254 }
6255
6256 let scalars = [
6257 make_val(Some("Foo".into())),
6258 make_val(None),
6259 make_val(Some("Bar".into())),
6260 ];
6261
6262 let array = ScalarValue::iter_to_array(scalars).unwrap();
6263 let array = as_dictionary_array::<Int32Type>(&array).unwrap();
6264 let values_array = as_string_array(array.values()).unwrap();
6265
6266 let values = array
6267 .keys_iter()
6268 .map(|k| {
6269 k.map(|k| {
6270 assert!(values_array.is_valid(k));
6271 values_array.value(k)
6272 })
6273 })
6274 .collect::<Vec<_>>();
6275
6276 let expected = vec![Some("Foo"), None, Some("Bar")];
6277 assert_eq!(values, expected);
6278 }
6279
6280 #[test]
6281 fn scalar_iter_to_array_mismatched_types() {
6282 use ScalarValue::*;
6283 let scalars = [Boolean(Some(true)), Int32(Some(5))];
6285
6286 let result = ScalarValue::iter_to_array(scalars).unwrap_err();
6287 assert!(result.to_string().contains("Inconsistent types in ScalarValue::iter_to_array. Expected Boolean, got Int32(5)"),
6288 "{}", result);
6289 }
6290
6291 #[test]
6292 fn scalar_try_from_array_null() {
6293 let array = vec![Some(33), None].into_iter().collect::<Int64Array>();
6294 let array: ArrayRef = Arc::new(array);
6295
6296 assert_eq!(
6297 ScalarValue::Int64(Some(33)),
6298 ScalarValue::try_from_array(&array, 0).unwrap()
6299 );
6300 assert_eq!(
6301 ScalarValue::Int64(None),
6302 ScalarValue::try_from_array(&array, 1).unwrap()
6303 );
6304 }
6305
6306 #[test]
6307 fn scalar_try_from_array_list_array_null() {
6308 let list = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
6309 Some(vec![Some(1), Some(2)]),
6310 None,
6311 ]);
6312
6313 let non_null_list_scalar = ScalarValue::try_from_array(&list, 0).unwrap();
6314 let null_list_scalar = ScalarValue::try_from_array(&list, 1).unwrap();
6315
6316 let data_type =
6317 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
6318
6319 assert_eq!(non_null_list_scalar.data_type(), data_type);
6320 assert_eq!(null_list_scalar.data_type(), data_type);
6321 }
6322
6323 #[test]
6324 fn scalar_try_from_list_datatypes() {
6325 let inner_field = Arc::new(Field::new_list_field(DataType::Int32, true));
6326
6327 let data_type = &DataType::List(Arc::clone(&inner_field));
6329 let scalar: ScalarValue = data_type.try_into().unwrap();
6330 let expected = ScalarValue::List(
6331 new_null_array(data_type, 1)
6332 .as_list::<i32>()
6333 .to_owned()
6334 .into(),
6335 );
6336 assert_eq!(expected, scalar);
6337 assert!(expected.is_null());
6338
6339 let data_type = &DataType::LargeList(Arc::clone(&inner_field));
6341 let scalar: ScalarValue = data_type.try_into().unwrap();
6342 let expected = ScalarValue::LargeList(
6343 new_null_array(data_type, 1)
6344 .as_list::<i64>()
6345 .to_owned()
6346 .into(),
6347 );
6348 assert_eq!(expected, scalar);
6349 assert!(expected.is_null());
6350
6351 let data_type = &DataType::FixedSizeList(Arc::clone(&inner_field), 5);
6353 let scalar: ScalarValue = data_type.try_into().unwrap();
6354 let expected = ScalarValue::FixedSizeList(
6355 new_null_array(data_type, 1)
6356 .as_fixed_size_list()
6357 .to_owned()
6358 .into(),
6359 );
6360 assert_eq!(expected, scalar);
6361 assert!(expected.is_null());
6362 }
6363
6364 #[test]
6365 fn scalar_try_from_list_of_list() {
6366 let data_type = DataType::List(Arc::new(Field::new_list_field(
6367 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
6368 true,
6369 )));
6370 let data_type = &data_type;
6371 let scalar: ScalarValue = data_type.try_into().unwrap();
6372
6373 let expected = ScalarValue::List(
6374 new_null_array(
6375 &DataType::List(Arc::new(Field::new_list_field(
6376 DataType::List(Arc::new(Field::new_list_field(
6377 DataType::Int32,
6378 true,
6379 ))),
6380 true,
6381 ))),
6382 1,
6383 )
6384 .as_list::<i32>()
6385 .to_owned()
6386 .into(),
6387 );
6388
6389 assert_eq!(expected, scalar)
6390 }
6391
6392 #[test]
6393 fn scalar_try_from_not_equal_list_nested_list() {
6394 let list_data_type =
6395 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
6396 let data_type = &list_data_type;
6397 let list_scalar: ScalarValue = data_type.try_into().unwrap();
6398
6399 let nested_list_data_type = DataType::List(Arc::new(Field::new_list_field(
6400 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
6401 true,
6402 )));
6403 let data_type = &nested_list_data_type;
6404 let nested_list_scalar: ScalarValue = data_type.try_into().unwrap();
6405
6406 assert_ne!(list_scalar, nested_list_scalar);
6407 }
6408
6409 #[test]
6410 fn scalar_try_from_dict_datatype() {
6411 let data_type =
6412 DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8));
6413 let data_type = &data_type;
6414 let expected = ScalarValue::Dictionary(
6415 Box::new(DataType::Int8),
6416 Box::new(ScalarValue::Utf8(None)),
6417 );
6418 assert_eq!(expected, data_type.try_into().unwrap())
6419 }
6420
6421 #[test]
6422 fn size_of_scalar() {
6423 assert_eq!(size_of::<ScalarValue>(), 64);
6434 }
6435
6436 #[test]
6437 fn memory_size() {
6438 let sv = ScalarValue::Binary(Some(Vec::with_capacity(10)));
6439 assert_eq!(sv.size(), size_of::<ScalarValue>() + 10,);
6440 let sv_size = sv.size();
6441
6442 let mut v = Vec::with_capacity(10);
6443 v.push(sv);
6445 assert_eq!(v.capacity(), 10);
6446 assert_eq!(
6447 ScalarValue::size_of_vec(&v),
6448 size_of::<Vec<ScalarValue>>() + (9 * size_of::<ScalarValue>()) + sv_size,
6449 );
6450
6451 let mut s = HashSet::with_capacity(0);
6452 s.insert(v.pop().unwrap());
6454 let s_capacity = s.capacity();
6456 assert_eq!(
6457 ScalarValue::size_of_hashset(&s),
6458 size_of::<HashSet<ScalarValue>>()
6459 + ((s_capacity - 1) * size_of::<ScalarValue>())
6460 + sv_size,
6461 );
6462 }
6463
6464 #[test]
6465 fn scalar_eq_array() {
6466 macro_rules! make_typed_vec {
6468 ($INPUT:expr, $TYPE:ident) => {{
6469 $INPUT
6470 .iter()
6471 .map(|v| v.map(|v| v as $TYPE))
6472 .collect::<Vec<_>>()
6473 }};
6474 }
6475
6476 let bool_vals = [Some(true), None, Some(false)];
6477 let f32_vals = [Some(-1.0), None, Some(1.0)];
6478 let f64_vals = make_typed_vec!(f32_vals, f64);
6479
6480 let i8_vals = [Some(-1), None, Some(1)];
6481 let i16_vals = make_typed_vec!(i8_vals, i16);
6482 let i32_vals = make_typed_vec!(i8_vals, i32);
6483 let i64_vals = make_typed_vec!(i8_vals, i64);
6484
6485 let u8_vals = [Some(0), None, Some(1)];
6486 let u16_vals = make_typed_vec!(u8_vals, u16);
6487 let u32_vals = make_typed_vec!(u8_vals, u32);
6488 let u64_vals = make_typed_vec!(u8_vals, u64);
6489
6490 let str_vals = [Some("foo"), None, Some("bar")];
6491
6492 let interval_dt_vals = [
6493 Some(IntervalDayTime::MINUS_ONE),
6494 None,
6495 Some(IntervalDayTime::ONE),
6496 ];
6497 let interval_mdn_vals = [
6498 Some(IntervalMonthDayNano::MINUS_ONE),
6499 None,
6500 Some(IntervalMonthDayNano::ONE),
6501 ];
6502
6503 #[derive(Debug)]
6507 struct TestCase {
6508 array: ArrayRef,
6509 scalars: Vec<ScalarValue>,
6510 }
6511
6512 macro_rules! make_test_case {
6514 ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident) => {{
6515 TestCase {
6516 array: Arc::new($INPUT.iter().collect::<$ARRAY_TY>()),
6517 scalars: $INPUT.iter().map(|v| ScalarValue::$SCALAR_TY(*v)).collect(),
6518 }
6519 }};
6520
6521 ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident, $TZ:expr) => {{
6522 let tz = $TZ;
6523 TestCase {
6524 array: Arc::new($INPUT.iter().collect::<$ARRAY_TY>()),
6525 scalars: $INPUT
6526 .iter()
6527 .map(|v| ScalarValue::$SCALAR_TY(*v, tz.clone()))
6528 .collect(),
6529 }
6530 }};
6531 }
6532
6533 macro_rules! make_str_test_case {
6534 ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident) => {{
6535 TestCase {
6536 array: Arc::new($INPUT.iter().cloned().collect::<$ARRAY_TY>()),
6537 scalars: $INPUT
6538 .iter()
6539 .map(|v| ScalarValue::$SCALAR_TY(v.map(|v| v.to_string())))
6540 .collect(),
6541 }
6542 }};
6543 }
6544
6545 macro_rules! make_binary_test_case {
6546 ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident) => {{
6547 TestCase {
6548 array: Arc::new($INPUT.iter().cloned().collect::<$ARRAY_TY>()),
6549 scalars: $INPUT
6550 .iter()
6551 .map(|v| {
6552 ScalarValue::$SCALAR_TY(v.map(|v| v.as_bytes().to_vec()))
6553 })
6554 .collect(),
6555 }
6556 }};
6557 }
6558
6559 macro_rules! make_str_dict_test_case {
6561 ($INPUT:expr, $INDEX_TY:ident) => {{
6562 TestCase {
6563 array: Arc::new(
6564 $INPUT
6565 .iter()
6566 .cloned()
6567 .collect::<DictionaryArray<$INDEX_TY>>(),
6568 ),
6569 scalars: $INPUT
6570 .iter()
6571 .map(|v| {
6572 ScalarValue::Dictionary(
6573 Box::new($INDEX_TY::DATA_TYPE),
6574 Box::new(ScalarValue::Utf8(v.map(|v| v.to_string()))),
6575 )
6576 })
6577 .collect(),
6578 }
6579 }};
6580 }
6581
6582 let cases = vec![
6583 make_test_case!(bool_vals, BooleanArray, Boolean),
6584 make_test_case!(f32_vals, Float32Array, Float32),
6585 make_test_case!(f64_vals, Float64Array, Float64),
6586 make_test_case!(i8_vals, Int8Array, Int8),
6587 make_test_case!(i16_vals, Int16Array, Int16),
6588 make_test_case!(i32_vals, Int32Array, Int32),
6589 make_test_case!(i64_vals, Int64Array, Int64),
6590 make_test_case!(u8_vals, UInt8Array, UInt8),
6591 make_test_case!(u16_vals, UInt16Array, UInt16),
6592 make_test_case!(u32_vals, UInt32Array, UInt32),
6593 make_test_case!(u64_vals, UInt64Array, UInt64),
6594 make_str_test_case!(str_vals, StringArray, Utf8),
6595 make_str_test_case!(str_vals, LargeStringArray, LargeUtf8),
6596 make_binary_test_case!(str_vals, BinaryArray, Binary),
6597 make_binary_test_case!(str_vals, LargeBinaryArray, LargeBinary),
6598 make_test_case!(i32_vals, Date32Array, Date32),
6599 make_test_case!(i64_vals, Date64Array, Date64),
6600 make_test_case!(i32_vals, Time32SecondArray, Time32Second),
6601 make_test_case!(i32_vals, Time32MillisecondArray, Time32Millisecond),
6602 make_test_case!(i64_vals, Time64MicrosecondArray, Time64Microsecond),
6603 make_test_case!(i64_vals, Time64NanosecondArray, Time64Nanosecond),
6604 make_test_case!(i64_vals, TimestampSecondArray, TimestampSecond, None),
6605 make_test_case!(
6606 i64_vals,
6607 TimestampSecondArray,
6608 TimestampSecond,
6609 Some("UTC".into())
6610 ),
6611 make_test_case!(
6612 i64_vals,
6613 TimestampMillisecondArray,
6614 TimestampMillisecond,
6615 None
6616 ),
6617 make_test_case!(
6618 i64_vals,
6619 TimestampMillisecondArray,
6620 TimestampMillisecond,
6621 Some("UTC".into())
6622 ),
6623 make_test_case!(
6624 i64_vals,
6625 TimestampMicrosecondArray,
6626 TimestampMicrosecond,
6627 None
6628 ),
6629 make_test_case!(
6630 i64_vals,
6631 TimestampMicrosecondArray,
6632 TimestampMicrosecond,
6633 Some("UTC".into())
6634 ),
6635 make_test_case!(
6636 i64_vals,
6637 TimestampNanosecondArray,
6638 TimestampNanosecond,
6639 None
6640 ),
6641 make_test_case!(
6642 i64_vals,
6643 TimestampNanosecondArray,
6644 TimestampNanosecond,
6645 Some("UTC".into())
6646 ),
6647 make_test_case!(i32_vals, IntervalYearMonthArray, IntervalYearMonth),
6648 make_test_case!(interval_dt_vals, IntervalDayTimeArray, IntervalDayTime),
6649 make_test_case!(
6650 interval_mdn_vals,
6651 IntervalMonthDayNanoArray,
6652 IntervalMonthDayNano
6653 ),
6654 make_str_dict_test_case!(str_vals, Int8Type),
6655 make_str_dict_test_case!(str_vals, Int16Type),
6656 make_str_dict_test_case!(str_vals, Int32Type),
6657 make_str_dict_test_case!(str_vals, Int64Type),
6658 make_str_dict_test_case!(str_vals, UInt8Type),
6659 make_str_dict_test_case!(str_vals, UInt16Type),
6660 make_str_dict_test_case!(str_vals, UInt32Type),
6661 make_str_dict_test_case!(str_vals, UInt64Type),
6662 ];
6663
6664 for case in cases {
6665 println!("**** Test Case *****");
6666 let TestCase { array, scalars } = case;
6667 println!("Input array type: {}", array.data_type());
6668 println!("Input scalars: {scalars:#?}");
6669 assert_eq!(array.len(), scalars.len());
6670
6671 for (index, scalar) in scalars.into_iter().enumerate() {
6672 assert!(
6673 scalar
6674 .eq_array(&array, index)
6675 .expect("Failed to compare arrays"),
6676 "Expected {scalar:?} to be equal to {array:?} at index {index}"
6677 );
6678
6679 for other_index in 0..array.len() {
6681 if index != other_index {
6682 assert!(
6683 !scalar.eq_array(&array, other_index).expect("Failed to compare arrays"),
6684 "Expected {scalar:?} to be NOT equal to {array:?} at index {other_index}"
6685 );
6686 }
6687 }
6688 }
6689 }
6690 }
6691
6692 #[test]
6693 fn scalar_partial_ordering() {
6694 use ScalarValue::*;
6695
6696 assert_eq!(
6697 Int64(Some(33)).partial_cmp(&Int64(Some(0))),
6698 Some(Ordering::Greater)
6699 );
6700 assert_eq!(
6701 Int64(Some(0)).partial_cmp(&Int64(Some(33))),
6702 Some(Ordering::Less)
6703 );
6704 assert_eq!(
6705 Int64(Some(33)).partial_cmp(&Int64(Some(33))),
6706 Some(Ordering::Equal)
6707 );
6708 assert_eq!(Int64(Some(33)).partial_cmp(&Int32(Some(33))), None);
6710 assert_eq!(Int32(Some(33)).partial_cmp(&Int64(Some(33))), None);
6711
6712 assert_eq!(
6713 ScalarValue::from(vec![
6714 ("A", ScalarValue::from(1.0)),
6715 ("B", ScalarValue::from("Z")),
6716 ])
6717 .partial_cmp(&ScalarValue::from(vec![
6718 ("A", ScalarValue::from(2.0)),
6719 ("B", ScalarValue::from("A")),
6720 ])),
6721 Some(Ordering::Less)
6722 );
6723
6724 assert_eq!(
6726 ScalarValue::from(vec![
6727 ("A", ScalarValue::from(1.0)),
6728 ("B", ScalarValue::from("Z")),
6729 ])
6730 .partial_cmp(&ScalarValue::from(vec![
6731 ("a", ScalarValue::from(2.0)),
6732 ("b", ScalarValue::from("A")),
6733 ])),
6734 None
6735 );
6736 }
6737
6738 #[test]
6739 fn test_scalar_value_from_string() {
6740 let scalar = ScalarValue::from("foo");
6741 assert_eq!(scalar, ScalarValue::Utf8(Some("foo".to_string())));
6742 let scalar = ScalarValue::from("foo".to_string());
6743 assert_eq!(scalar, ScalarValue::Utf8(Some("foo".to_string())));
6744 let scalar = ScalarValue::from_str("foo").unwrap();
6745 assert_eq!(scalar, ScalarValue::Utf8(Some("foo".to_string())));
6746 }
6747
6748 #[test]
6749 fn test_scalar_struct() {
6750 let field_a = Arc::new(Field::new("A", DataType::Int32, false));
6751 let field_b = Arc::new(Field::new("B", DataType::Boolean, false));
6752 let field_c = Arc::new(Field::new("C", DataType::Utf8, false));
6753
6754 let field_e = Arc::new(Field::new("e", DataType::Int16, false));
6755 let field_f = Arc::new(Field::new("f", DataType::Int64, false));
6756 let field_d = Arc::new(Field::new(
6757 "D",
6758 DataType::Struct(vec![Arc::clone(&field_e), Arc::clone(&field_f)].into()),
6759 false,
6760 ));
6761
6762 let struct_array = StructArray::from(vec![
6763 (
6764 Arc::clone(&field_e),
6765 Arc::new(Int16Array::from(vec![2])) as ArrayRef,
6766 ),
6767 (
6768 Arc::clone(&field_f),
6769 Arc::new(Int64Array::from(vec![3])) as ArrayRef,
6770 ),
6771 ]);
6772
6773 let struct_array = StructArray::from(vec![
6774 (
6775 Arc::clone(&field_a),
6776 Arc::new(Int32Array::from(vec![23])) as ArrayRef,
6777 ),
6778 (
6779 Arc::clone(&field_b),
6780 Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
6781 ),
6782 (
6783 Arc::clone(&field_c),
6784 Arc::new(StringArray::from(vec!["Hello"])) as ArrayRef,
6785 ),
6786 (Arc::clone(&field_d), Arc::new(struct_array) as ArrayRef),
6787 ]);
6788 let scalar = ScalarValue::Struct(Arc::new(struct_array));
6789
6790 let array = scalar
6791 .to_array_of_size(2)
6792 .expect("Failed to convert to array of size");
6793
6794 let expected = Arc::new(StructArray::from(vec![
6795 (
6796 Arc::clone(&field_a),
6797 Arc::new(Int32Array::from(vec![23, 23])) as ArrayRef,
6798 ),
6799 (
6800 Arc::clone(&field_b),
6801 Arc::new(BooleanArray::from(vec![false, false])) as ArrayRef,
6802 ),
6803 (
6804 Arc::clone(&field_c),
6805 Arc::new(StringArray::from(vec!["Hello", "Hello"])) as ArrayRef,
6806 ),
6807 (
6808 Arc::clone(&field_d),
6809 Arc::new(StructArray::from(vec![
6810 (
6811 Arc::clone(&field_e),
6812 Arc::new(Int16Array::from(vec![2, 2])) as ArrayRef,
6813 ),
6814 (
6815 Arc::clone(&field_f),
6816 Arc::new(Int64Array::from(vec![3, 3])) as ArrayRef,
6817 ),
6818 ])) as ArrayRef,
6819 ),
6820 ])) as ArrayRef;
6821
6822 assert_eq!(&array, &expected);
6823
6824 let constructed = ScalarValue::try_from_array(&expected, 1).unwrap();
6826 assert_eq!(constructed, scalar);
6827
6828 let none_scalar = ScalarValue::try_from(array.data_type()).unwrap();
6830 assert!(none_scalar.is_null());
6831 assert_eq!(
6832 format!("{none_scalar:?}"),
6833 String::from("Struct({A:,B:,C:,D:})")
6834 );
6835
6836 let constructed = ScalarValue::from(vec![
6838 ("A", ScalarValue::from(23)),
6839 ("B", ScalarValue::from(false)),
6840 ("C", ScalarValue::from("Hello")),
6841 (
6842 "D",
6843 ScalarValue::from(vec![
6844 ("e", ScalarValue::from(2i16)),
6845 ("f", ScalarValue::from(3i64)),
6846 ]),
6847 ),
6848 ]);
6849 assert_eq!(constructed, scalar);
6850
6851 let scalars = vec![
6853 ScalarValue::from(vec![
6854 ("A", ScalarValue::from(23)),
6855 ("B", ScalarValue::from(false)),
6856 ("C", ScalarValue::from("Hello")),
6857 (
6858 "D",
6859 ScalarValue::from(vec![
6860 ("e", ScalarValue::from(2i16)),
6861 ("f", ScalarValue::from(3i64)),
6862 ]),
6863 ),
6864 ]),
6865 ScalarValue::from(vec![
6866 ("A", ScalarValue::from(7)),
6867 ("B", ScalarValue::from(true)),
6868 ("C", ScalarValue::from("World")),
6869 (
6870 "D",
6871 ScalarValue::from(vec![
6872 ("e", ScalarValue::from(4i16)),
6873 ("f", ScalarValue::from(5i64)),
6874 ]),
6875 ),
6876 ]),
6877 ScalarValue::from(vec![
6878 ("A", ScalarValue::from(-1000)),
6879 ("B", ScalarValue::from(true)),
6880 ("C", ScalarValue::from("!!!!!")),
6881 (
6882 "D",
6883 ScalarValue::from(vec![
6884 ("e", ScalarValue::from(6i16)),
6885 ("f", ScalarValue::from(7i64)),
6886 ]),
6887 ),
6888 ]),
6889 ];
6890 let array = ScalarValue::iter_to_array(scalars).unwrap();
6891
6892 let expected = Arc::new(StructArray::from(vec![
6893 (
6894 Arc::clone(&field_a),
6895 Arc::new(Int32Array::from(vec![23, 7, -1000])) as ArrayRef,
6896 ),
6897 (
6898 Arc::clone(&field_b),
6899 Arc::new(BooleanArray::from(vec![false, true, true])) as ArrayRef,
6900 ),
6901 (
6902 Arc::clone(&field_c),
6903 Arc::new(StringArray::from(vec!["Hello", "World", "!!!!!"])) as ArrayRef,
6904 ),
6905 (
6906 Arc::clone(&field_d),
6907 Arc::new(StructArray::from(vec![
6908 (
6909 Arc::clone(&field_e),
6910 Arc::new(Int16Array::from(vec![2, 4, 6])) as ArrayRef,
6911 ),
6912 (
6913 Arc::clone(&field_f),
6914 Arc::new(Int64Array::from(vec![3, 5, 7])) as ArrayRef,
6915 ),
6916 ])) as ArrayRef,
6917 ),
6918 ])) as ArrayRef;
6919
6920 assert_eq!(&array, &expected);
6921 }
6922
6923 #[test]
6924 fn round_trip() {
6925 let cases: Vec<ArrayRef> = vec![
6927 Arc::new(Int8Array::from(vec![Some(1), None, Some(3)])),
6929 Arc::new(Int16Array::from(vec![Some(1), None, Some(3)])),
6930 Arc::new(Int32Array::from(vec![Some(1), None, Some(3)])),
6931 Arc::new(Int64Array::from(vec![Some(1), None, Some(3)])),
6932 Arc::new(UInt8Array::from(vec![Some(1), None, Some(3)])),
6933 Arc::new(UInt16Array::from(vec![Some(1), None, Some(3)])),
6934 Arc::new(UInt32Array::from(vec![Some(1), None, Some(3)])),
6935 Arc::new(UInt64Array::from(vec![Some(1), None, Some(3)])),
6936 Arc::new(BooleanArray::from(vec![Some(true), None, Some(false)])),
6938 Arc::new(Float32Array::from(vec![Some(1.0), None, Some(3.0)])),
6940 Arc::new(Float64Array::from(vec![Some(1.0), None, Some(3.0)])),
6941 Arc::new(StringArray::from(vec![Some("foo"), None, Some("bar")])),
6943 Arc::new(LargeStringArray::from(vec![Some("foo"), None, Some("bar")])),
6944 Arc::new(StringViewArray::from(vec![Some("foo"), None, Some("bar")])),
6945 {
6947 let mut builder = StringDictionaryBuilder::<Int32Type>::new();
6948 builder.append("foo").unwrap();
6949 builder.append_null();
6950 builder.append("bar").unwrap();
6951 Arc::new(builder.finish())
6952 },
6953 Arc::new(BinaryArray::from_iter(vec![
6955 Some(b"foo"),
6956 None,
6957 Some(b"bar"),
6958 ])),
6959 Arc::new(LargeBinaryArray::from_iter(vec![
6960 Some(b"foo"),
6961 None,
6962 Some(b"bar"),
6963 ])),
6964 Arc::new(BinaryViewArray::from_iter(vec![
6965 Some(b"foo"),
6966 None,
6967 Some(b"bar"),
6968 ])),
6969 Arc::new(TimestampSecondArray::from(vec![Some(1), None, Some(3)])),
6971 Arc::new(TimestampMillisecondArray::from(vec![
6972 Some(1),
6973 None,
6974 Some(3),
6975 ])),
6976 Arc::new(TimestampMicrosecondArray::from(vec![
6977 Some(1),
6978 None,
6979 Some(3),
6980 ])),
6981 Arc::new(TimestampNanosecondArray::from(vec![Some(1), None, Some(3)])),
6982 Arc::new(
6984 TimestampSecondArray::from(vec![Some(1), None, Some(3)])
6985 .with_timezone_opt(Some("UTC")),
6986 ),
6987 Arc::new(
6988 TimestampMillisecondArray::from(vec![Some(1), None, Some(3)])
6989 .with_timezone_opt(Some("UTC")),
6990 ),
6991 Arc::new(
6992 TimestampMicrosecondArray::from(vec![Some(1), None, Some(3)])
6993 .with_timezone_opt(Some("UTC")),
6994 ),
6995 Arc::new(
6996 TimestampNanosecondArray::from(vec![Some(1), None, Some(3)])
6997 .with_timezone_opt(Some("UTC")),
6998 ),
6999 Arc::new(Date32Array::from(vec![Some(1), None, Some(3)])),
7001 Arc::new(Date64Array::from(vec![Some(1), None, Some(3)])),
7002 Arc::new(Time32SecondArray::from(vec![Some(1), None, Some(3)])),
7004 Arc::new(Time32MillisecondArray::from(vec![Some(1), None, Some(3)])),
7005 Arc::new(Time64MicrosecondArray::from(vec![Some(1), None, Some(3)])),
7006 Arc::new(Time64NanosecondArray::from(vec![Some(1), None, Some(3)])),
7007 Arc::new(NullArray::new(3)),
7009 {
7011 let mut builder = UnionBuilder::new_dense();
7012 builder.append::<Int32Type>("a", 1).unwrap();
7013 builder.append::<Float64Type>("b", 3.4).unwrap();
7014 Arc::new(builder.build().unwrap())
7015 },
7016 {
7018 let mut builder = UnionBuilder::new_sparse();
7019 builder.append::<Int32Type>("a", 1).unwrap();
7020 builder.append::<Float64Type>("b", 3.4).unwrap();
7021 Arc::new(builder.build().unwrap())
7022 },
7023 {
7025 let values_builder = StringBuilder::new();
7026 let mut builder = ListBuilder::new(values_builder);
7027 builder.values().append_value("A");
7029 builder.values().append_value("B");
7030 builder.append(true);
7031 builder.append(true);
7033 builder.values().append_value("?"); builder.append(false);
7036 Arc::new(builder.finish())
7037 },
7038 {
7040 let values_builder = StringBuilder::new();
7041 let mut builder = LargeListBuilder::new(values_builder);
7042 builder.values().append_value("A");
7044 builder.values().append_value("B");
7045 builder.append(true);
7046 builder.append(true);
7048 builder.append(false);
7050 Arc::new(builder.finish())
7051 },
7052 {
7054 let values_builder = Int32Builder::new();
7055 let mut builder = FixedSizeListBuilder::new(values_builder, 3);
7056
7057 builder.values().append_value(0);
7059 builder.values().append_value(1);
7060 builder.values().append_value(2);
7061 builder.append(true);
7062 builder.values().append_null();
7063 builder.values().append_null();
7064 builder.values().append_null();
7065 builder.append(false);
7066 builder.values().append_value(3);
7067 builder.values().append_null();
7068 builder.values().append_value(5);
7069 builder.append(true);
7070 Arc::new(builder.finish())
7071 },
7072 {
7074 let string_builder = StringBuilder::new();
7075 let int_builder = Int32Builder::with_capacity(4);
7076
7077 let mut builder = MapBuilder::new(None, string_builder, int_builder);
7078 builder.keys().append_value("joe");
7080 builder.values().append_value(1);
7081 builder.append(true).unwrap();
7082 builder.append(true).unwrap();
7084 builder.append(false).unwrap();
7086
7087 Arc::new(builder.finish())
7088 },
7089 ];
7090
7091 for arr in cases {
7092 round_trip_through_scalar(arr);
7093 }
7094 }
7095
7096 fn round_trip_through_scalar(arr: ArrayRef) {
7101 for i in 0..arr.len() {
7102 let scalar = ScalarValue::try_from_array(&arr, i).unwrap();
7104 let array = scalar.to_array_of_size(1).unwrap();
7105 assert_eq!(array.len(), 1);
7106 assert_eq!(array.data_type(), arr.data_type());
7107 assert_eq!(array.as_ref(), arr.slice(i, 1).as_ref());
7108 }
7109 }
7110
7111 #[test]
7112 fn test_scalar_union_sparse() {
7113 let field_a = Arc::new(Field::new("A", DataType::Int32, true));
7114 let field_b = Arc::new(Field::new("B", DataType::Boolean, true));
7115 let field_c = Arc::new(Field::new("C", DataType::Utf8, true));
7116 let fields = UnionFields::from_iter([(0, field_a), (1, field_b), (2, field_c)]);
7117
7118 let mut values_a = vec![None; 6];
7119 values_a[0] = Some(42);
7120 let mut values_b = vec![None; 6];
7121 values_b[1] = Some(true);
7122 let mut values_c = vec![None; 6];
7123 values_c[2] = Some("foo");
7124 let children: Vec<ArrayRef> = vec![
7125 Arc::new(Int32Array::from(values_a)),
7126 Arc::new(BooleanArray::from(values_b)),
7127 Arc::new(StringArray::from(values_c)),
7128 ];
7129
7130 let type_ids = ScalarBuffer::from(vec![0, 1, 2, 0, 1, 2]);
7131 let array: ArrayRef = Arc::new(
7132 UnionArray::try_new(fields.clone(), type_ids, None, children)
7133 .expect("UnionArray"),
7134 );
7135
7136 let expected = [
7137 (0, ScalarValue::from(42)),
7138 (1, ScalarValue::from(true)),
7139 (2, ScalarValue::from("foo")),
7140 (0, ScalarValue::Int32(None)),
7141 (1, ScalarValue::Boolean(None)),
7142 (2, ScalarValue::Utf8(None)),
7143 ];
7144
7145 for (i, (ti, value)) in expected.into_iter().enumerate() {
7146 let is_null = value.is_null();
7147 let value = Some((ti, Box::new(value)));
7148 let expected = ScalarValue::Union(value, fields.clone(), UnionMode::Sparse);
7149 let actual = ScalarValue::try_from_array(&array, i).expect("try_from_array");
7150
7151 assert_eq!(
7152 actual, expected,
7153 "[{i}] {actual} was not equal to {expected}"
7154 );
7155
7156 assert!(
7157 expected.eq_array(&array, i).expect("eq_array"),
7158 "[{i}] {expected}.eq_array was false"
7159 );
7160
7161 if is_null {
7162 assert!(actual.is_null(), "[{i}] {actual} was not null")
7163 }
7164 }
7165 }
7166
7167 #[test]
7168 fn test_scalar_union_dense() {
7169 let field_a = Arc::new(Field::new("A", DataType::Int32, true));
7170 let field_b = Arc::new(Field::new("B", DataType::Boolean, true));
7171 let field_c = Arc::new(Field::new("C", DataType::Utf8, true));
7172 let fields = UnionFields::from_iter([(0, field_a), (1, field_b), (2, field_c)]);
7173 let children: Vec<ArrayRef> = vec![
7174 Arc::new(Int32Array::from(vec![Some(42), None])),
7175 Arc::new(BooleanArray::from(vec![Some(true), None])),
7176 Arc::new(StringArray::from(vec![Some("foo"), None])),
7177 ];
7178
7179 let type_ids = ScalarBuffer::from(vec![0, 1, 2, 0, 1, 2]);
7180 let offsets = ScalarBuffer::from(vec![0, 0, 0, 1, 1, 1]);
7181 let array: ArrayRef = Arc::new(
7182 UnionArray::try_new(fields.clone(), type_ids, Some(offsets), children)
7183 .expect("UnionArray"),
7184 );
7185
7186 let expected = [
7187 (0, ScalarValue::from(42)),
7188 (1, ScalarValue::from(true)),
7189 (2, ScalarValue::from("foo")),
7190 (0, ScalarValue::Int32(None)),
7191 (1, ScalarValue::Boolean(None)),
7192 (2, ScalarValue::Utf8(None)),
7193 ];
7194
7195 for (i, (ti, value)) in expected.into_iter().enumerate() {
7196 let is_null = value.is_null();
7197 let value = Some((ti, Box::new(value)));
7198 let expected = ScalarValue::Union(value, fields.clone(), UnionMode::Dense);
7199 let actual = ScalarValue::try_from_array(&array, i).expect("try_from_array");
7200
7201 assert_eq!(
7202 actual, expected,
7203 "[{i}] {actual} was not equal to {expected}"
7204 );
7205
7206 assert!(
7207 expected.eq_array(&array, i).expect("eq_array"),
7208 "[{i}] {expected}.eq_array was false"
7209 );
7210
7211 if is_null {
7212 assert!(actual.is_null(), "[{i}] {actual} was not null")
7213 }
7214 }
7215 }
7216
7217 #[test]
7218 fn test_lists_in_struct() {
7219 let field_a = Arc::new(Field::new("A", DataType::Utf8, false));
7220 let field_primitive_list = Arc::new(Field::new(
7221 "primitive_list",
7222 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
7223 false,
7224 ));
7225
7226 let l0 =
7228 ScalarValue::List(Arc::new(
7229 ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
7230 Some(1),
7231 Some(2),
7232 Some(3),
7233 ])]),
7234 ));
7235 let l1 =
7236 ScalarValue::List(Arc::new(
7237 ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
7238 Some(4),
7239 Some(5),
7240 ])]),
7241 ));
7242 let l2 = ScalarValue::List(Arc::new(ListArray::from_iter_primitive::<
7243 Int32Type,
7244 _,
7245 _,
7246 >(vec![Some(vec![Some(6)])])));
7247
7248 let s0 = ScalarValue::from(vec![
7250 ("A", ScalarValue::from("First")),
7251 ("primitive_list", l0),
7252 ]);
7253
7254 let s1 = ScalarValue::from(vec![
7255 ("A", ScalarValue::from("Second")),
7256 ("primitive_list", l1),
7257 ]);
7258
7259 let s2 = ScalarValue::from(vec![
7260 ("A", ScalarValue::from("Third")),
7261 ("primitive_list", l2),
7262 ]);
7263
7264 let array =
7266 ScalarValue::iter_to_array(vec![s0.clone(), s1.clone(), s2.clone()]).unwrap();
7267
7268 let array = as_struct_array(&array).unwrap();
7269 let expected = StructArray::from(vec![
7270 (
7271 Arc::clone(&field_a),
7272 Arc::new(StringArray::from(vec!["First", "Second", "Third"])) as ArrayRef,
7273 ),
7274 (
7275 Arc::clone(&field_primitive_list),
7276 Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
7277 Some(vec![Some(1), Some(2), Some(3)]),
7278 Some(vec![Some(4), Some(5)]),
7279 Some(vec![Some(6)]),
7280 ])),
7281 ),
7282 ]);
7283
7284 assert_eq!(array, &expected);
7285
7286 let nl0_array = ScalarValue::iter_to_array(vec![s0, s1.clone()]).unwrap();
7289 let nl0 = SingleRowListArrayBuilder::new(nl0_array).build_list_scalar();
7290
7291 let nl1_array = ScalarValue::iter_to_array(vec![s2]).unwrap();
7292 let nl1 = SingleRowListArrayBuilder::new(nl1_array).build_list_scalar();
7293
7294 let nl2_array = ScalarValue::iter_to_array(vec![s1]).unwrap();
7295 let nl2 = SingleRowListArrayBuilder::new(nl2_array).build_list_scalar();
7296
7297 let array = ScalarValue::iter_to_array(vec![nl0, nl1, nl2]).unwrap();
7299 let array = array.as_list::<i32>();
7300
7301 let field_a_builder = StringBuilder::with_capacity(4, 1024);
7303 let primitive_value_builder = Int32Array::builder(8);
7304 let field_primitive_list_builder = ListBuilder::new(primitive_value_builder);
7305
7306 let element_builder = StructBuilder::new(
7307 vec![field_a, field_primitive_list],
7308 vec![
7309 Box::new(field_a_builder),
7310 Box::new(field_primitive_list_builder),
7311 ],
7312 );
7313
7314 let mut list_builder = ListBuilder::new(element_builder);
7315
7316 list_builder
7317 .values()
7318 .field_builder::<StringBuilder>(0)
7319 .unwrap()
7320 .append_value("First");
7321 list_builder
7322 .values()
7323 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7324 .unwrap()
7325 .values()
7326 .append_value(1);
7327 list_builder
7328 .values()
7329 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7330 .unwrap()
7331 .values()
7332 .append_value(2);
7333 list_builder
7334 .values()
7335 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7336 .unwrap()
7337 .values()
7338 .append_value(3);
7339 list_builder
7340 .values()
7341 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7342 .unwrap()
7343 .append(true);
7344 list_builder.values().append(true);
7345
7346 list_builder
7347 .values()
7348 .field_builder::<StringBuilder>(0)
7349 .unwrap()
7350 .append_value("Second");
7351 list_builder
7352 .values()
7353 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7354 .unwrap()
7355 .values()
7356 .append_value(4);
7357 list_builder
7358 .values()
7359 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7360 .unwrap()
7361 .values()
7362 .append_value(5);
7363 list_builder
7364 .values()
7365 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7366 .unwrap()
7367 .append(true);
7368 list_builder.values().append(true);
7369 list_builder.append(true);
7370
7371 list_builder
7372 .values()
7373 .field_builder::<StringBuilder>(0)
7374 .unwrap()
7375 .append_value("Third");
7376 list_builder
7377 .values()
7378 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7379 .unwrap()
7380 .values()
7381 .append_value(6);
7382 list_builder
7383 .values()
7384 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7385 .unwrap()
7386 .append(true);
7387 list_builder.values().append(true);
7388 list_builder.append(true);
7389
7390 list_builder
7391 .values()
7392 .field_builder::<StringBuilder>(0)
7393 .unwrap()
7394 .append_value("Second");
7395 list_builder
7396 .values()
7397 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7398 .unwrap()
7399 .values()
7400 .append_value(4);
7401 list_builder
7402 .values()
7403 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7404 .unwrap()
7405 .values()
7406 .append_value(5);
7407 list_builder
7408 .values()
7409 .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7410 .unwrap()
7411 .append(true);
7412 list_builder.values().append(true);
7413 list_builder.append(true);
7414
7415 let expected = list_builder.finish();
7416
7417 assert_eq!(array, &expected);
7418 }
7419
7420 fn build_2d_list(data: Vec<Option<i32>>) -> ListArray {
7421 let a1 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(data)]);
7422 ListArray::new(
7423 Arc::new(Field::new_list_field(
7424 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
7425 true,
7426 )),
7427 OffsetBuffer::<i32>::from_lengths([1]),
7428 Arc::new(a1),
7429 None,
7430 )
7431 }
7432
7433 #[test]
7434 fn test_nested_lists() {
7435 let arr1 = build_2d_list(vec![Some(1), Some(2), Some(3)]);
7437 let arr2 = build_2d_list(vec![Some(4), Some(5)]);
7438 let arr3 = build_2d_list(vec![Some(6)]);
7439
7440 let array = ScalarValue::iter_to_array(vec![
7441 ScalarValue::List(Arc::new(arr1)),
7442 ScalarValue::List(Arc::new(arr2)),
7443 ScalarValue::List(Arc::new(arr3)),
7444 ])
7445 .unwrap();
7446 let array = array.as_list::<i32>();
7447
7448 let inner_builder = Int32Array::builder(6);
7450 let middle_builder = ListBuilder::new(inner_builder);
7451 let mut outer_builder = ListBuilder::new(middle_builder);
7452
7453 outer_builder.values().values().append_value(1);
7454 outer_builder.values().values().append_value(2);
7455 outer_builder.values().values().append_value(3);
7456 outer_builder.values().append(true);
7457 outer_builder.append(true);
7458
7459 outer_builder.values().values().append_value(4);
7460 outer_builder.values().values().append_value(5);
7461 outer_builder.values().append(true);
7462 outer_builder.append(true);
7463
7464 outer_builder.values().values().append_value(6);
7465 outer_builder.values().append(true);
7466 outer_builder.append(true);
7467
7468 let expected = outer_builder.finish();
7469
7470 assert_eq!(array, &expected);
7471 }
7472
7473 #[test]
7474 fn scalar_timestamp_ns_utc_timezone() {
7475 let scalar = ScalarValue::TimestampNanosecond(
7476 Some(1599566400000000000),
7477 Some("UTC".into()),
7478 );
7479
7480 assert_eq!(
7481 scalar.data_type(),
7482 DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
7483 );
7484
7485 let array = scalar.to_array().expect("Failed to convert to array");
7486 assert_eq!(array.len(), 1);
7487 assert_eq!(
7488 array.data_type(),
7489 &DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
7490 );
7491
7492 let new_scalar = ScalarValue::try_from_array(&array, 0).unwrap();
7493 assert_eq!(
7494 new_scalar.data_type(),
7495 DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
7496 );
7497 }
7498
7499 #[test]
7500 fn cast_round_trip() {
7501 check_scalar_cast(ScalarValue::Int8(Some(5)), DataType::Int16);
7502 check_scalar_cast(ScalarValue::Int8(None), DataType::Int16);
7503
7504 check_scalar_cast(ScalarValue::Float64(Some(5.5)), DataType::Int16);
7505
7506 check_scalar_cast(ScalarValue::Float64(None), DataType::Int16);
7507
7508 check_scalar_cast(
7509 ScalarValue::from("foo"),
7510 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
7511 );
7512
7513 check_scalar_cast(
7514 ScalarValue::Utf8(None),
7515 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
7516 );
7517
7518 check_scalar_cast(ScalarValue::Utf8(None), DataType::Utf8View);
7519 check_scalar_cast(ScalarValue::from("foo"), DataType::Utf8View);
7520 check_scalar_cast(
7521 ScalarValue::from("larger than 12 bytes string"),
7522 DataType::Utf8View,
7523 );
7524 check_scalar_cast(
7525 {
7526 let element_field =
7527 Arc::new(Field::new("element", DataType::Int32, true));
7528
7529 let mut builder =
7530 ListBuilder::new(Int32Builder::new()).with_field(element_field);
7531 builder.append_value([Some(1)]);
7532 builder.append(true);
7533
7534 ScalarValue::List(Arc::new(builder.finish()))
7535 },
7536 DataType::List(Arc::new(Field::new("element", DataType::Int64, true))),
7537 );
7538 check_scalar_cast(
7539 {
7540 let element_field =
7541 Arc::new(Field::new("element", DataType::Int32, true));
7542
7543 let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 1)
7544 .with_field(element_field);
7545 builder.values().append_value(1);
7546 builder.append(true);
7547
7548 ScalarValue::FixedSizeList(Arc::new(builder.finish()))
7549 },
7550 DataType::FixedSizeList(
7551 Arc::new(Field::new("element", DataType::Int64, true)),
7552 1,
7553 ),
7554 );
7555 check_scalar_cast(
7556 {
7557 let element_field =
7558 Arc::new(Field::new("element", DataType::Int32, true));
7559
7560 let mut builder =
7561 LargeListBuilder::new(Int32Builder::new()).with_field(element_field);
7562 builder.append_value([Some(1)]);
7563 builder.append(true);
7564
7565 ScalarValue::LargeList(Arc::new(builder.finish()))
7566 },
7567 DataType::LargeList(Arc::new(Field::new("element", DataType::Int64, true))),
7568 );
7569 }
7570
7571 fn check_scalar_cast(scalar: ScalarValue, desired_type: DataType) {
7573 let scalar_array = scalar.to_array().expect("Failed to convert to array");
7575 let cast_array = kernels::cast::cast(&scalar_array, &desired_type).unwrap();
7577
7578 let cast_scalar = ScalarValue::try_from_array(&cast_array, 0).unwrap();
7580 assert_eq!(cast_scalar.data_type(), desired_type);
7581
7582 let array = cast_scalar
7584 .to_array_of_size(10)
7585 .expect("Failed to convert to array of size");
7586
7587 assert_eq!(array.data_type(), &desired_type)
7589 }
7590
7591 #[test]
7592 fn test_scalar_negative() -> Result<()> {
7593 let value = ScalarValue::Int32(Some(12));
7595 assert_eq!(ScalarValue::Int32(Some(-12)), value.arithmetic_negate()?);
7596 let value = ScalarValue::Int32(None);
7597 assert_eq!(ScalarValue::Int32(None), value.arithmetic_negate()?);
7598
7599 let value = ScalarValue::UInt8(Some(12));
7601 assert!(value.arithmetic_negate().is_err());
7602 let value = ScalarValue::Boolean(None);
7603 assert!(value.arithmetic_negate().is_err());
7604 Ok(())
7605 }
7606
7607 #[test]
7608 #[allow(arithmetic_overflow)] fn test_scalar_negative_overflows() -> Result<()> {
7610 macro_rules! test_overflow_on_value {
7611 ($($val:expr),* $(,)?) => {$(
7612 {
7613 let value: ScalarValue = $val;
7614 let err = value.arithmetic_negate().expect_err("Should receive overflow error on negating {value:?}");
7615 let root_err = err.find_root();
7616 match root_err{
7617 DataFusionError::ArrowError(err, _) if matches!(err.as_ref(), ArrowError::ArithmeticOverflow(_)) => {}
7618 _ => return Err(err),
7619 };
7620 }
7621 )*};
7622 }
7623 test_overflow_on_value!(
7624 i8::MIN.into(),
7626 i16::MIN.into(),
7627 i32::MIN.into(),
7628 i64::MIN.into(),
7629 ScalarValue::try_new_decimal128(i128::MIN, 10, 5)?,
7631 ScalarValue::Decimal256(Some(i256::MIN), 20, 5),
7632 ScalarValue::IntervalYearMonth(Some(i32::MIN)),
7634 ScalarValue::new_interval_dt(i32::MIN, 999),
7635 ScalarValue::new_interval_dt(1, i32::MIN),
7636 ScalarValue::new_interval_mdn(i32::MIN, 15, 123_456),
7637 ScalarValue::new_interval_mdn(12, i32::MIN, 123_456),
7638 ScalarValue::new_interval_mdn(12, 15, i64::MIN),
7639 ScalarValue::TimestampSecond(Some(i64::MIN), None),
7641 ScalarValue::TimestampMillisecond(Some(i64::MIN), None),
7642 ScalarValue::TimestampMicrosecond(Some(i64::MIN), None),
7643 ScalarValue::TimestampNanosecond(Some(i64::MIN), None),
7644 );
7645
7646 let float_cases = [
7647 (
7648 ScalarValue::Float16(Some(f16::MIN)),
7649 ScalarValue::Float16(Some(f16::MAX)),
7650 ),
7651 (
7652 ScalarValue::Float16(Some(f16::MAX)),
7653 ScalarValue::Float16(Some(f16::MIN)),
7654 ),
7655 (f32::MIN.into(), f32::MAX.into()),
7656 (f32::MAX.into(), f32::MIN.into()),
7657 (f64::MIN.into(), f64::MAX.into()),
7658 (f64::MAX.into(), f64::MIN.into()),
7659 ];
7660 for (test, expected) in float_cases.into_iter().skip(2) {
7662 assert_eq!(test.arithmetic_negate()?, expected);
7663 }
7664 Ok(())
7665 }
7666
7667 #[test]
7668 fn f16_test_overflow() {
7669 let cases = [
7671 (
7672 ScalarValue::Float16(Some(f16::MIN)),
7673 ScalarValue::Float16(Some(f16::MAX)),
7674 ),
7675 (
7676 ScalarValue::Float16(Some(f16::MAX)),
7677 ScalarValue::Float16(Some(f16::MIN)),
7678 ),
7679 ];
7680
7681 for (test, expected) in cases {
7682 assert_eq!(test.arithmetic_negate().unwrap(), expected);
7683 }
7684 }
7685
7686 macro_rules! expect_operation_error {
7687 ($TEST_NAME:ident, $FUNCTION:ident, $EXPECTED_ERROR:expr) => {
7688 #[test]
7689 fn $TEST_NAME() {
7690 let lhs = ScalarValue::UInt64(Some(12));
7691 let rhs = ScalarValue::Int32(Some(-3));
7692 match lhs.$FUNCTION(&rhs) {
7693 Ok(_result) => {
7694 panic!(
7695 "Expected binary operation error between lhs: '{:?}', rhs: {:?}",
7696 lhs, rhs
7697 );
7698 }
7699 Err(e) => {
7700 let error_message = e.to_string();
7701 assert!(
7702 error_message.contains($EXPECTED_ERROR),
7703 "Expected error '{}' not found in actual error '{}'",
7704 $EXPECTED_ERROR,
7705 error_message
7706 );
7707 }
7708 }
7709 }
7710 };
7711 }
7712
7713 expect_operation_error!(
7714 expect_add_error,
7715 add,
7716 "Invalid arithmetic operation: UInt64 + Int32"
7717 );
7718 expect_operation_error!(
7719 expect_sub_error,
7720 sub,
7721 "Invalid arithmetic operation: UInt64 - Int32"
7722 );
7723
7724 macro_rules! decimal_op_test_cases {
7725 ($OPERATION:ident, [$([$L_VALUE:expr, $L_PRECISION:expr, $L_SCALE:expr, $R_VALUE:expr, $R_PRECISION:expr, $R_SCALE:expr, $O_VALUE:expr, $O_PRECISION:expr, $O_SCALE:expr]),+]) => {
7726 $(
7727
7728 let left = ScalarValue::Decimal128($L_VALUE, $L_PRECISION, $L_SCALE);
7729 let right = ScalarValue::Decimal128($R_VALUE, $R_PRECISION, $R_SCALE);
7730 let result = left.$OPERATION(&right).unwrap();
7731 assert_eq!(ScalarValue::Decimal128($O_VALUE, $O_PRECISION, $O_SCALE), result);
7732
7733 )+
7734 };
7735 }
7736
7737 #[test]
7738 fn decimal_operations() {
7739 decimal_op_test_cases!(
7740 add,
7741 [
7742 [Some(123), 10, 2, Some(124), 10, 2, Some(123 + 124), 11, 2],
7743 [
7745 Some(123),
7746 10,
7747 3,
7748 Some(124),
7749 10,
7750 2,
7751 Some(123 + 124 * 10_i128.pow(1)),
7752 12,
7753 3
7754 ],
7755 [
7757 Some(123),
7758 10,
7759 2,
7760 Some(124),
7761 11,
7762 3,
7763 Some(123 * 10_i128.pow(3 - 2) + 124),
7764 12,
7765 3
7766 ]
7767 ]
7768 );
7769 }
7770
7771 #[test]
7772 fn decimal_operations_with_nulls() {
7773 decimal_op_test_cases!(
7774 add,
7775 [
7776 [None, 10, 2, Some(123), 10, 2, None, 11, 2],
7778 [Some(123), 10, 2, None, 10, 2, None, 11, 2],
7780 [Some(123), 8, 2, None, 10, 3, None, 11, 3],
7782 [None, 8, 2, Some(123), 10, 3, None, 11, 3],
7784 [Some(123), 8, 4, None, 10, 3, None, 12, 4],
7786 [None, 10, 3, Some(123), 8, 4, None, 12, 4]
7788 ]
7789 );
7790 }
7791
7792 #[test]
7793 fn test_scalar_distance() {
7794 let cases = [
7795 (ScalarValue::Int8(Some(1)), ScalarValue::Int8(Some(2)), 1),
7798 (ScalarValue::Int8(Some(2)), ScalarValue::Int8(Some(1)), 1),
7799 (
7800 ScalarValue::Int16(Some(-5)),
7801 ScalarValue::Int16(Some(5)),
7802 10,
7803 ),
7804 (
7805 ScalarValue::Int16(Some(5)),
7806 ScalarValue::Int16(Some(-5)),
7807 10,
7808 ),
7809 (ScalarValue::Int32(Some(0)), ScalarValue::Int32(Some(0)), 0),
7810 (
7811 ScalarValue::Int32(Some(-5)),
7812 ScalarValue::Int32(Some(-10)),
7813 5,
7814 ),
7815 (
7816 ScalarValue::Int64(Some(-10)),
7817 ScalarValue::Int64(Some(-5)),
7818 5,
7819 ),
7820 (ScalarValue::UInt8(Some(1)), ScalarValue::UInt8(Some(2)), 1),
7821 (ScalarValue::UInt8(Some(0)), ScalarValue::UInt8(Some(0)), 0),
7822 (
7823 ScalarValue::UInt16(Some(5)),
7824 ScalarValue::UInt16(Some(10)),
7825 5,
7826 ),
7827 (
7828 ScalarValue::UInt32(Some(10)),
7829 ScalarValue::UInt32(Some(5)),
7830 5,
7831 ),
7832 (
7833 ScalarValue::UInt64(Some(5)),
7834 ScalarValue::UInt64(Some(10)),
7835 5,
7836 ),
7837 (
7838 ScalarValue::Float16(Some(f16::from_f32(1.1))),
7839 ScalarValue::Float16(Some(f16::from_f32(1.9))),
7840 1,
7841 ),
7842 (
7843 ScalarValue::Float16(Some(f16::from_f32(-5.3))),
7844 ScalarValue::Float16(Some(f16::from_f32(-9.2))),
7845 4,
7846 ),
7847 (
7848 ScalarValue::Float16(Some(f16::from_f32(-5.3))),
7849 ScalarValue::Float16(Some(f16::from_f32(-9.7))),
7850 4,
7851 ),
7852 (
7853 ScalarValue::Float32(Some(1.0)),
7854 ScalarValue::Float32(Some(2.0)),
7855 1,
7856 ),
7857 (
7858 ScalarValue::Float32(Some(2.0)),
7859 ScalarValue::Float32(Some(1.0)),
7860 1,
7861 ),
7862 (
7863 ScalarValue::Float64(Some(0.0)),
7864 ScalarValue::Float64(Some(0.0)),
7865 0,
7866 ),
7867 (
7868 ScalarValue::Float64(Some(-5.0)),
7869 ScalarValue::Float64(Some(-10.0)),
7870 5,
7871 ),
7872 (
7873 ScalarValue::Float64(Some(-10.0)),
7874 ScalarValue::Float64(Some(-5.0)),
7875 5,
7876 ),
7877 (
7881 ScalarValue::Float32(Some(1.2)),
7882 ScalarValue::Float32(Some(1.3)),
7883 0,
7884 ),
7885 (
7886 ScalarValue::Float32(Some(1.1)),
7887 ScalarValue::Float32(Some(1.9)),
7888 1,
7889 ),
7890 (
7891 ScalarValue::Float64(Some(-5.3)),
7892 ScalarValue::Float64(Some(-9.2)),
7893 4,
7894 ),
7895 (
7896 ScalarValue::Float64(Some(-5.3)),
7897 ScalarValue::Float64(Some(-9.7)),
7898 4,
7899 ),
7900 (
7901 ScalarValue::Float64(Some(-5.3)),
7902 ScalarValue::Float64(Some(-9.9)),
7903 5,
7904 ),
7905 (
7906 ScalarValue::Decimal128(Some(10), 1, 0),
7907 ScalarValue::Decimal128(Some(5), 1, 0),
7908 5,
7909 ),
7910 (
7911 ScalarValue::Decimal128(Some(5), 1, 0),
7912 ScalarValue::Decimal128(Some(10), 1, 0),
7913 5,
7914 ),
7915 (
7916 ScalarValue::Decimal256(Some(10.into()), 1, 0),
7917 ScalarValue::Decimal256(Some(5.into()), 1, 0),
7918 5,
7919 ),
7920 (
7921 ScalarValue::Decimal256(Some(5.into()), 1, 0),
7922 ScalarValue::Decimal256(Some(10.into()), 1, 0),
7923 5,
7924 ),
7925 ];
7926 for (lhs, rhs, expected) in cases.iter() {
7927 let distance = lhs.distance(rhs).unwrap();
7928 assert_eq!(distance, *expected);
7929 }
7930 }
7931
7932 #[test]
7933 fn test_distance_none() {
7934 let cases = [
7935 (
7936 ScalarValue::Decimal128(Some(i128::MAX), DECIMAL128_MAX_PRECISION, 0),
7937 ScalarValue::Decimal128(Some(-i128::MAX), DECIMAL128_MAX_PRECISION, 0),
7938 ),
7939 (
7940 ScalarValue::Decimal256(Some(i256::MAX), DECIMAL256_MAX_PRECISION, 0),
7941 ScalarValue::Decimal256(Some(-i256::MAX), DECIMAL256_MAX_PRECISION, 0),
7942 ),
7943 ];
7944 for (lhs, rhs) in cases.iter() {
7945 let distance = lhs.distance(rhs);
7946 assert!(distance.is_none(), "{lhs} vs {rhs}");
7947 }
7948 }
7949
7950 #[test]
7951 fn test_scalar_distance_invalid() {
7952 let cases = [
7953 (ScalarValue::Int8(None), ScalarValue::Int8(None)),
7957 (ScalarValue::Int8(None), ScalarValue::Int8(Some(1))),
7958 (ScalarValue::Int8(Some(1)), ScalarValue::Int8(None)),
7959 (ScalarValue::Int8(Some(1)), ScalarValue::Int16(Some(1))),
7961 (ScalarValue::Int8(Some(1)), ScalarValue::Float32(Some(1.0))),
7962 (
7963 ScalarValue::Float16(Some(f16::from_f32(1.0))),
7964 ScalarValue::Float32(Some(1.0)),
7965 ),
7966 (
7967 ScalarValue::Float16(Some(f16::from_f32(1.0))),
7968 ScalarValue::Int32(Some(1)),
7969 ),
7970 (
7971 ScalarValue::Float64(Some(1.1)),
7972 ScalarValue::Float32(Some(2.2)),
7973 ),
7974 (
7975 ScalarValue::UInt64(Some(777)),
7976 ScalarValue::Int32(Some(111)),
7977 ),
7978 (ScalarValue::Int8(None), ScalarValue::Int16(Some(1))),
7980 (ScalarValue::Int8(Some(1)), ScalarValue::Int16(None)),
7981 (ScalarValue::from("foo"), ScalarValue::from("bar")),
7983 (
7984 ScalarValue::Boolean(Some(true)),
7985 ScalarValue::Boolean(Some(false)),
7986 ),
7987 (ScalarValue::Date32(Some(0)), ScalarValue::Date32(Some(1))),
7988 (ScalarValue::Date64(Some(0)), ScalarValue::Date64(Some(1))),
7989 (
7990 ScalarValue::Decimal128(Some(123), 5, 5),
7991 ScalarValue::Decimal128(Some(120), 5, 3),
7992 ),
7993 (
7994 ScalarValue::Decimal128(Some(123), 5, 5),
7995 ScalarValue::Decimal128(Some(120), 3, 5),
7996 ),
7997 (
7998 ScalarValue::Decimal256(Some(123.into()), 5, 5),
7999 ScalarValue::Decimal256(Some(120.into()), 3, 5),
8000 ),
8001 (
8003 ScalarValue::Decimal256(
8004 Some(i256::from_parts(0, 2_i64.pow(50).into())),
8005 1,
8006 0,
8007 ),
8008 ScalarValue::Decimal256(
8009 Some(i256::from_parts(0, (-(2_i64).pow(50)).into())),
8010 1,
8011 0,
8012 ),
8013 ),
8014 (
8016 ScalarValue::Decimal256(Some(i256::from_parts(0, i128::MAX)), 1, 0),
8017 ScalarValue::Decimal256(Some(i256::from_parts(0, -i128::MAX)), 1, 0),
8018 ),
8019 ];
8020 for (lhs, rhs) in cases {
8021 let distance = lhs.distance(&rhs);
8022 assert!(distance.is_none());
8023 }
8024 }
8025
8026 #[test]
8027 fn test_scalar_interval_negate() {
8028 let cases = [
8029 (
8030 ScalarValue::new_interval_ym(1, 12),
8031 ScalarValue::new_interval_ym(-1, -12),
8032 ),
8033 (
8034 ScalarValue::new_interval_dt(1, 999),
8035 ScalarValue::new_interval_dt(-1, -999),
8036 ),
8037 (
8038 ScalarValue::new_interval_mdn(12, 15, 123_456),
8039 ScalarValue::new_interval_mdn(-12, -15, -123_456),
8040 ),
8041 ];
8042 for (expr, expected) in cases.iter() {
8043 let result = expr.arithmetic_negate().unwrap();
8044 assert_eq!(*expected, result, "-expr:{expr:?}");
8045 }
8046 }
8047
8048 #[test]
8049 fn test_scalar_interval_add() {
8050 let cases = [
8051 (
8052 ScalarValue::new_interval_ym(1, 12),
8053 ScalarValue::new_interval_ym(1, 12),
8054 ScalarValue::new_interval_ym(2, 24),
8055 ),
8056 (
8057 ScalarValue::new_interval_dt(1, 999),
8058 ScalarValue::new_interval_dt(1, 999),
8059 ScalarValue::new_interval_dt(2, 1998),
8060 ),
8061 (
8062 ScalarValue::new_interval_mdn(12, 15, 123_456),
8063 ScalarValue::new_interval_mdn(12, 15, 123_456),
8064 ScalarValue::new_interval_mdn(24, 30, 246_912),
8065 ),
8066 ];
8067 for (lhs, rhs, expected) in cases.iter() {
8068 let result = lhs.add(rhs).unwrap();
8069 let result_commute = rhs.add(lhs).unwrap();
8070 assert_eq!(*expected, result, "lhs:{lhs:?} + rhs:{rhs:?}");
8071 assert_eq!(*expected, result_commute, "lhs:{rhs:?} + rhs:{lhs:?}");
8072 }
8073 }
8074
8075 #[test]
8076 fn test_scalar_interval_sub() {
8077 let cases = [
8078 (
8079 ScalarValue::new_interval_ym(1, 12),
8080 ScalarValue::new_interval_ym(1, 12),
8081 ScalarValue::new_interval_ym(0, 0),
8082 ),
8083 (
8084 ScalarValue::new_interval_dt(1, 999),
8085 ScalarValue::new_interval_dt(1, 999),
8086 ScalarValue::new_interval_dt(0, 0),
8087 ),
8088 (
8089 ScalarValue::new_interval_mdn(12, 15, 123_456),
8090 ScalarValue::new_interval_mdn(12, 15, 123_456),
8091 ScalarValue::new_interval_mdn(0, 0, 0),
8092 ),
8093 ];
8094 for (lhs, rhs, expected) in cases.iter() {
8095 let result = lhs.sub(rhs).unwrap();
8096 assert_eq!(*expected, result, "lhs:{lhs:?} - rhs:{rhs:?}");
8097 }
8098 }
8099
8100 #[test]
8101 fn timestamp_op_random_tests() {
8102 let sample_size = 1000;
8105 let timestamps1 = get_random_timestamps(sample_size);
8106 let intervals = get_random_intervals(sample_size);
8107 for (idx, ts1) in timestamps1.iter().enumerate() {
8111 if idx % 2 == 0 {
8112 let timestamp2 = ts1.add(intervals[idx].clone()).unwrap();
8113 let back = timestamp2.sub(intervals[idx].clone()).unwrap();
8114 assert_eq!(ts1, &back);
8115 } else {
8116 let timestamp2 = ts1.sub(intervals[idx].clone()).unwrap();
8117 let back = timestamp2.add(intervals[idx].clone()).unwrap();
8118 assert_eq!(ts1, &back);
8119 };
8120 }
8121 }
8122
8123 #[test]
8124 fn test_struct_nulls() {
8125 let fields_b = Fields::from(vec![
8126 Field::new("ba", DataType::UInt64, true),
8127 Field::new("bb", DataType::UInt64, true),
8128 ]);
8129 let fields = Fields::from(vec![
8130 Field::new("a", DataType::UInt64, true),
8131 Field::new("b", DataType::Struct(fields_b.clone()), true),
8132 ]);
8133
8134 let struct_value = vec![
8135 (
8136 Arc::clone(&fields[0]),
8137 Arc::new(UInt64Array::from(vec![Some(1)])) as ArrayRef,
8138 ),
8139 (
8140 Arc::clone(&fields[1]),
8141 Arc::new(StructArray::from(vec![
8142 (
8143 Arc::clone(&fields_b[0]),
8144 Arc::new(UInt64Array::from(vec![Some(2)])) as ArrayRef,
8145 ),
8146 (
8147 Arc::clone(&fields_b[1]),
8148 Arc::new(UInt64Array::from(vec![Some(3)])) as ArrayRef,
8149 ),
8150 ])) as ArrayRef,
8151 ),
8152 ];
8153
8154 let struct_value_with_nulls = vec![
8155 (
8156 Arc::clone(&fields[0]),
8157 Arc::new(UInt64Array::from(vec![Some(1)])) as ArrayRef,
8158 ),
8159 (
8160 Arc::clone(&fields[1]),
8161 Arc::new(StructArray::from((
8162 vec![
8163 (
8164 Arc::clone(&fields_b[0]),
8165 Arc::new(UInt64Array::from(vec![Some(2)])) as ArrayRef,
8166 ),
8167 (
8168 Arc::clone(&fields_b[1]),
8169 Arc::new(UInt64Array::from(vec![Some(3)])) as ArrayRef,
8170 ),
8171 ],
8172 Buffer::from(&[0]),
8173 ))) as ArrayRef,
8174 ),
8175 ];
8176
8177 let scalars = vec![
8178 ScalarValue::Struct(Arc::new(StructArray::from((
8180 struct_value.clone(),
8181 Buffer::from(&[0]),
8182 )))),
8183 ScalarValue::Struct(Arc::new(StructArray::from((
8185 struct_value_with_nulls.clone(),
8186 Buffer::from(&[1]),
8187 )))),
8188 ScalarValue::Struct(Arc::new(StructArray::from((
8190 struct_value.clone(),
8191 Buffer::from(&[1]),
8192 )))),
8193 ];
8194
8195 let check_array = |array| {
8196 let is_null = is_null(&array).unwrap();
8197 assert_eq!(is_null, BooleanArray::from(vec![true, false, false]));
8198
8199 let formatted = pretty_format_columns("col", &[array]).unwrap().to_string();
8200 let formatted = formatted.split('\n').collect::<Vec<_>>();
8201 let expected = vec![
8202 "+---------------------------+",
8203 "| col |",
8204 "+---------------------------+",
8205 "| |",
8206 "| {a: 1, b: } |",
8207 "| {a: 1, b: {ba: 2, bb: 3}} |",
8208 "+---------------------------+",
8209 ];
8210 assert_eq!(
8211 formatted, expected,
8212 "Actual:\n{formatted:#?}\n\nExpected:\n{expected:#?}"
8213 );
8214 };
8215
8216 let array = ScalarValue::iter_to_array(scalars.clone()).unwrap();
8218 check_array(array);
8219
8220 let arrays = scalars
8222 .iter()
8223 .map(ScalarValue::to_array)
8224 .collect::<Result<Vec<_>>>()
8225 .expect("Failed to convert to array");
8226 let arrays = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
8227 let array = arrow::compute::concat(&arrays).unwrap();
8228 check_array(array);
8229 }
8230
8231 #[test]
8232 fn test_struct_display() {
8233 let field_a = Field::new("a", DataType::Int32, true);
8234 let field_b = Field::new("b", DataType::Utf8, true);
8235
8236 let s = ScalarStructBuilder::new()
8237 .with_scalar(field_a, ScalarValue::from(1i32))
8238 .with_scalar(field_b, ScalarValue::Utf8(None))
8239 .build()
8240 .unwrap();
8241
8242 assert_eq!(s.to_string(), "{a:1,b:}");
8243 assert_eq!(format!("{s:?}"), r#"Struct({a:1,b:})"#);
8244
8245 let ScalarValue::Struct(arr) = s else {
8246 panic!("Expected struct");
8247 };
8248
8249 let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
8251 assert_snapshot!(batches_to_string(&[batch]), @r"
8252 +-------------+
8253 | s |
8254 +-------------+
8255 | {a: 1, b: } |
8256 +-------------+
8257 ");
8258 }
8259
8260 #[test]
8261 fn test_null_bug() {
8262 let field_a = Field::new("a", DataType::Int32, true);
8263 let field_b = Field::new("b", DataType::Int32, true);
8264 let fields = Fields::from(vec![field_a, field_b]);
8265
8266 let array_a = Arc::new(Int32Array::from_iter_values([1]));
8267 let array_b = Arc::new(Int32Array::from_iter_values([2]));
8268 let arrays: Vec<ArrayRef> = vec![array_a, array_b];
8269
8270 let mut not_nulls = NullBufferBuilder::new(1);
8271
8272 not_nulls.append_non_null();
8273
8274 let ar = StructArray::new(fields, arrays, not_nulls.finish());
8275 let s = ScalarValue::Struct(Arc::new(ar));
8276
8277 assert_eq!(s.to_string(), "{a:1,b:2}");
8278 assert_eq!(format!("{s:?}"), r#"Struct({a:1,b:2})"#);
8279
8280 let ScalarValue::Struct(arr) = s else {
8281 panic!("Expected struct");
8282 };
8283
8284 let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
8286 assert_snapshot!(batches_to_string(&[batch]), @r"
8287 +--------------+
8288 | s |
8289 +--------------+
8290 | {a: 1, b: 2} |
8291 +--------------+
8292 ");
8293 }
8294
8295 #[test]
8296 fn test_display_date64_large_values() {
8297 assert_eq!(
8298 format!("{}", ScalarValue::Date64(Some(790179464505))),
8299 "1995-01-15"
8300 );
8301 assert_eq!(
8303 format!("{}", ScalarValue::Date64(Some(-790179464505600000))),
8304 ""
8305 );
8306 }
8307
8308 #[test]
8309 fn test_struct_display_null() {
8310 let fields = vec![Field::new("a", DataType::Int32, false)];
8311 let s = ScalarStructBuilder::new_null(fields);
8312 assert_eq!(s.to_string(), "NULL");
8313
8314 let ScalarValue::Struct(arr) = s else {
8315 panic!("Expected struct");
8316 };
8317
8318 let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
8320
8321 assert_snapshot!(batches_to_string(&[batch]), @r"
8322 +---+
8323 | s |
8324 +---+
8325 | |
8326 +---+
8327 ");
8328 }
8329
8330 #[test]
8331 fn test_map_display_and_debug() {
8332 let string_builder = StringBuilder::new();
8333 let int_builder = Int32Builder::with_capacity(4);
8334 let mut builder = MapBuilder::new(None, string_builder, int_builder);
8335 builder.keys().append_value("joe");
8336 builder.values().append_value(1);
8337 builder.append(true).unwrap();
8338
8339 builder.keys().append_value("blogs");
8340 builder.values().append_value(2);
8341 builder.keys().append_value("foo");
8342 builder.values().append_value(4);
8343 builder.append(true).unwrap();
8344 builder.append(true).unwrap();
8345 builder.append(false).unwrap();
8346
8347 let map_value = ScalarValue::Map(Arc::new(builder.finish()));
8348
8349 assert_eq!(map_value.to_string(), "[{joe:1},{blogs:2,foo:4},{},NULL]");
8350 assert_eq!(
8351 format!("{map_value:?}"),
8352 r#"Map([{"joe":"1"},{"blogs":"2","foo":"4"},{},NULL])"#
8353 );
8354
8355 let ScalarValue::Map(arr) = map_value else {
8356 panic!("Expected map");
8357 };
8358
8359 let batch = RecordBatch::try_from_iter(vec![("m", arr as _)]).unwrap();
8361 assert_snapshot!(batches_to_string(&[batch]), @r"
8362 +--------------------+
8363 | m |
8364 +--------------------+
8365 | {joe: 1} |
8366 | {blogs: 2, foo: 4} |
8367 | {} |
8368 | |
8369 +--------------------+
8370 ");
8371 }
8372
8373 #[test]
8374 fn test_binary_display() {
8375 let no_binary_value = ScalarValue::Binary(None);
8376 assert_eq!(format!("{no_binary_value}"), "NULL");
8377 let single_binary_value = ScalarValue::Binary(Some(vec![42u8]));
8378 assert_eq!(format!("{single_binary_value}"), "2A");
8379 let small_binary_value = ScalarValue::Binary(Some(vec![1u8, 2, 3]));
8380 assert_eq!(format!("{small_binary_value}"), "010203");
8381 let large_binary_value =
8382 ScalarValue::Binary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
8383 assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
8384
8385 let no_binary_value = ScalarValue::BinaryView(None);
8386 assert_eq!(format!("{no_binary_value}"), "NULL");
8387 let small_binary_value = ScalarValue::BinaryView(Some(vec![1u8, 2, 3]));
8388 assert_eq!(format!("{small_binary_value}"), "010203");
8389 let large_binary_value =
8390 ScalarValue::BinaryView(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
8391 assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
8392
8393 let no_binary_value = ScalarValue::LargeBinary(None);
8394 assert_eq!(format!("{no_binary_value}"), "NULL");
8395 let small_binary_value = ScalarValue::LargeBinary(Some(vec![1u8, 2, 3]));
8396 assert_eq!(format!("{small_binary_value}"), "010203");
8397 let large_binary_value =
8398 ScalarValue::LargeBinary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
8399 assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
8400
8401 let no_binary_value = ScalarValue::FixedSizeBinary(3, None);
8402 assert_eq!(format!("{no_binary_value}"), "NULL");
8403 let small_binary_value = ScalarValue::FixedSizeBinary(3, Some(vec![1u8, 2, 3]));
8404 assert_eq!(format!("{small_binary_value}"), "010203");
8405 let large_binary_value = ScalarValue::FixedSizeBinary(
8406 11,
8407 Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
8408 );
8409 assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
8410 }
8411
8412 #[test]
8413 fn test_binary_debug() {
8414 let no_binary_value = ScalarValue::Binary(None);
8415 assert_eq!(format!("{no_binary_value:?}"), "Binary(NULL)");
8416 let single_binary_value = ScalarValue::Binary(Some(vec![42u8]));
8417 assert_eq!(format!("{single_binary_value:?}"), "Binary(\"42\")");
8418 let small_binary_value = ScalarValue::Binary(Some(vec![1u8, 2, 3]));
8419 assert_eq!(format!("{small_binary_value:?}"), "Binary(\"1,2,3\")");
8420 let large_binary_value =
8421 ScalarValue::Binary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
8422 assert_eq!(
8423 format!("{large_binary_value:?}"),
8424 "Binary(\"1,2,3,4,5,6,7,8,9,10,11\")"
8425 );
8426
8427 let no_binary_value = ScalarValue::BinaryView(None);
8428 assert_eq!(format!("{no_binary_value:?}"), "BinaryView(NULL)");
8429 let small_binary_value = ScalarValue::BinaryView(Some(vec![1u8, 2, 3]));
8430 assert_eq!(format!("{small_binary_value:?}"), "BinaryView(\"1,2,3\")");
8431 let large_binary_value =
8432 ScalarValue::BinaryView(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
8433 assert_eq!(
8434 format!("{large_binary_value:?}"),
8435 "BinaryView(\"1,2,3,4,5,6,7,8,9,10,11\")"
8436 );
8437
8438 let no_binary_value = ScalarValue::LargeBinary(None);
8439 assert_eq!(format!("{no_binary_value:?}"), "LargeBinary(NULL)");
8440 let small_binary_value = ScalarValue::LargeBinary(Some(vec![1u8, 2, 3]));
8441 assert_eq!(format!("{small_binary_value:?}"), "LargeBinary(\"1,2,3\")");
8442 let large_binary_value =
8443 ScalarValue::LargeBinary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
8444 assert_eq!(
8445 format!("{large_binary_value:?}"),
8446 "LargeBinary(\"1,2,3,4,5,6,7,8,9,10,11\")"
8447 );
8448
8449 let no_binary_value = ScalarValue::FixedSizeBinary(3, None);
8450 assert_eq!(format!("{no_binary_value:?}"), "FixedSizeBinary(3, NULL)");
8451 let small_binary_value = ScalarValue::FixedSizeBinary(3, Some(vec![1u8, 2, 3]));
8452 assert_eq!(
8453 format!("{small_binary_value:?}"),
8454 "FixedSizeBinary(3, \"1,2,3\")"
8455 );
8456 let large_binary_value = ScalarValue::FixedSizeBinary(
8457 11,
8458 Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
8459 );
8460 assert_eq!(
8461 format!("{large_binary_value:?}"),
8462 "FixedSizeBinary(11, \"1,2,3,4,5,6,7,8,9,10,11\")"
8463 );
8464 }
8465
8466 #[test]
8467 fn test_build_timestamp_millisecond_list() {
8468 let values = vec![ScalarValue::TimestampMillisecond(Some(1), None)];
8469 let arr = ScalarValue::new_list_nullable(
8470 &values,
8471 &DataType::Timestamp(TimeUnit::Millisecond, None),
8472 );
8473 assert_eq!(1, arr.len());
8474 }
8475
8476 #[test]
8477 fn test_newlist_timestamp_zone() {
8478 let s: &'static str = "UTC";
8479 let values = vec![ScalarValue::TimestampMillisecond(Some(1), Some(s.into()))];
8480 let arr = ScalarValue::new_list_nullable(
8481 &values,
8482 &DataType::Timestamp(TimeUnit::Millisecond, Some(s.into())),
8483 );
8484 assert_eq!(1, arr.len());
8485 assert_eq!(
8486 arr.data_type(),
8487 &DataType::List(Arc::new(Field::new_list_field(
8488 DataType::Timestamp(TimeUnit::Millisecond, Some(s.into())),
8489 true,
8490 )))
8491 );
8492 }
8493
8494 fn get_random_timestamps(sample_size: u64) -> Vec<ScalarValue> {
8495 let vector_size = sample_size;
8496 let mut timestamp = vec![];
8497 let mut rng = rand::rng();
8498 for i in 0..vector_size {
8499 let year = rng.random_range(1995..=2050);
8500 let month = rng.random_range(1..=12);
8501 let day = rng.random_range(1..=28); let hour = rng.random_range(0..=23);
8503 let minute = rng.random_range(0..=59);
8504 let second = rng.random_range(0..=59);
8505 if i % 4 == 0 {
8506 timestamp.push(ScalarValue::TimestampSecond(
8507 Some(
8508 NaiveDate::from_ymd_opt(year, month, day)
8509 .unwrap()
8510 .and_hms_opt(hour, minute, second)
8511 .unwrap()
8512 .and_utc()
8513 .timestamp(),
8514 ),
8515 None,
8516 ))
8517 } else if i % 4 == 1 {
8518 let millisec = rng.random_range(0..=999);
8519 timestamp.push(ScalarValue::TimestampMillisecond(
8520 Some(
8521 NaiveDate::from_ymd_opt(year, month, day)
8522 .unwrap()
8523 .and_hms_milli_opt(hour, minute, second, millisec)
8524 .unwrap()
8525 .and_utc()
8526 .timestamp_millis(),
8527 ),
8528 None,
8529 ))
8530 } else if i % 4 == 2 {
8531 let microsec = rng.random_range(0..=999_999);
8532 timestamp.push(ScalarValue::TimestampMicrosecond(
8533 Some(
8534 NaiveDate::from_ymd_opt(year, month, day)
8535 .unwrap()
8536 .and_hms_micro_opt(hour, minute, second, microsec)
8537 .unwrap()
8538 .and_utc()
8539 .timestamp_micros(),
8540 ),
8541 None,
8542 ))
8543 } else if i % 4 == 3 {
8544 let nanosec = rng.random_range(0..=999_999_999);
8545 timestamp.push(ScalarValue::TimestampNanosecond(
8546 Some(
8547 NaiveDate::from_ymd_opt(year, month, day)
8548 .unwrap()
8549 .and_hms_nano_opt(hour, minute, second, nanosec)
8550 .unwrap()
8551 .and_utc()
8552 .timestamp_nanos_opt()
8553 .unwrap(),
8554 ),
8555 None,
8556 ))
8557 }
8558 }
8559 timestamp
8560 }
8561
8562 fn get_random_intervals(sample_size: u64) -> Vec<ScalarValue> {
8563 const MILLISECS_IN_ONE_DAY: i64 = 86_400_000;
8564 const NANOSECS_IN_ONE_DAY: i64 = 86_400_000_000_000;
8565
8566 let vector_size = sample_size;
8567 let mut intervals = vec![];
8568 let mut rng = rand::rng();
8569 const SECS_IN_ONE_DAY: i32 = 86_400;
8570 const MICROSECS_IN_ONE_DAY: i64 = 86_400_000_000;
8571 for i in 0..vector_size {
8572 if i % 4 == 0 {
8573 let days = rng.random_range(0..5000);
8574 let millis = rng.random_range(0..SECS_IN_ONE_DAY) * 1000;
8576 intervals.push(ScalarValue::new_interval_dt(days, millis));
8577 } else if i % 4 == 1 {
8578 let days = rng.random_range(0..5000);
8579 let millisec = rng.random_range(0..(MILLISECS_IN_ONE_DAY as i32));
8580 intervals.push(ScalarValue::new_interval_dt(days, millisec));
8581 } else if i % 4 == 2 {
8582 let days = rng.random_range(0..5000);
8583 let nanosec = rng.random_range(0..MICROSECS_IN_ONE_DAY) * 1000;
8585 intervals.push(ScalarValue::new_interval_mdn(0, days, nanosec));
8586 } else {
8587 let days = rng.random_range(0..5000);
8588 let nanosec = rng.random_range(0..NANOSECS_IN_ONE_DAY);
8589 intervals.push(ScalarValue::new_interval_mdn(0, days, nanosec));
8590 }
8591 }
8592 intervals
8593 }
8594
8595 fn union_fields() -> UnionFields {
8596 [
8597 (0, Arc::new(Field::new("A", DataType::Int32, true))),
8598 (1, Arc::new(Field::new("B", DataType::Float64, true))),
8599 ]
8600 .into_iter()
8601 .collect()
8602 }
8603
8604 #[test]
8605 fn sparse_scalar_union_is_null() {
8606 let sparse_scalar = ScalarValue::Union(
8607 Some((0_i8, Box::new(ScalarValue::Int32(None)))),
8608 union_fields(),
8609 UnionMode::Sparse,
8610 );
8611 assert!(sparse_scalar.is_null());
8612 }
8613
8614 #[test]
8615 fn dense_scalar_union_is_null() {
8616 let dense_scalar = ScalarValue::Union(
8617 Some((0_i8, Box::new(ScalarValue::Int32(None)))),
8618 union_fields(),
8619 UnionMode::Dense,
8620 );
8621 assert!(dense_scalar.is_null());
8622 }
8623
8624 #[test]
8625 fn null_dictionary_scalar_produces_null_dictionary_array() {
8626 let dictionary_scalar = ScalarValue::Dictionary(
8627 Box::new(DataType::Int32),
8628 Box::new(ScalarValue::Null),
8629 );
8630 assert!(dictionary_scalar.is_null());
8631 let dictionary_array = dictionary_scalar.to_array().unwrap();
8632 assert!(dictionary_array.is_null(0));
8633 }
8634
8635 #[test]
8636 fn test_scalar_value_try_new_null() {
8637 let scalars = vec![
8638 ScalarValue::try_new_null(&DataType::Boolean).unwrap(),
8639 ScalarValue::try_new_null(&DataType::Int8).unwrap(),
8640 ScalarValue::try_new_null(&DataType::Int16).unwrap(),
8641 ScalarValue::try_new_null(&DataType::Int32).unwrap(),
8642 ScalarValue::try_new_null(&DataType::Int64).unwrap(),
8643 ScalarValue::try_new_null(&DataType::UInt8).unwrap(),
8644 ScalarValue::try_new_null(&DataType::UInt16).unwrap(),
8645 ScalarValue::try_new_null(&DataType::UInt32).unwrap(),
8646 ScalarValue::try_new_null(&DataType::UInt64).unwrap(),
8647 ScalarValue::try_new_null(&DataType::Float16).unwrap(),
8648 ScalarValue::try_new_null(&DataType::Float32).unwrap(),
8649 ScalarValue::try_new_null(&DataType::Float64).unwrap(),
8650 ScalarValue::try_new_null(&DataType::Decimal128(42, 42)).unwrap(),
8651 ScalarValue::try_new_null(&DataType::Decimal256(42, 42)).unwrap(),
8652 ScalarValue::try_new_null(&DataType::Utf8).unwrap(),
8653 ScalarValue::try_new_null(&DataType::LargeUtf8).unwrap(),
8654 ScalarValue::try_new_null(&DataType::Utf8View).unwrap(),
8655 ScalarValue::try_new_null(&DataType::Binary).unwrap(),
8656 ScalarValue::try_new_null(&DataType::BinaryView).unwrap(),
8657 ScalarValue::try_new_null(&DataType::FixedSizeBinary(42)).unwrap(),
8658 ScalarValue::try_new_null(&DataType::LargeBinary).unwrap(),
8659 ScalarValue::try_new_null(&DataType::Date32).unwrap(),
8660 ScalarValue::try_new_null(&DataType::Date64).unwrap(),
8661 ScalarValue::try_new_null(&DataType::Time32(TimeUnit::Second)).unwrap(),
8662 ScalarValue::try_new_null(&DataType::Time32(TimeUnit::Millisecond)).unwrap(),
8663 ScalarValue::try_new_null(&DataType::Time64(TimeUnit::Microsecond)).unwrap(),
8664 ScalarValue::try_new_null(&DataType::Time64(TimeUnit::Nanosecond)).unwrap(),
8665 ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Second, None))
8666 .unwrap(),
8667 ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Millisecond, None))
8668 .unwrap(),
8669 ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Microsecond, None))
8670 .unwrap(),
8671 ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Nanosecond, None))
8672 .unwrap(),
8673 ScalarValue::try_new_null(&DataType::Interval(IntervalUnit::YearMonth))
8674 .unwrap(),
8675 ScalarValue::try_new_null(&DataType::Interval(IntervalUnit::DayTime))
8676 .unwrap(),
8677 ScalarValue::try_new_null(&DataType::Interval(IntervalUnit::MonthDayNano))
8678 .unwrap(),
8679 ScalarValue::try_new_null(&DataType::Duration(TimeUnit::Second)).unwrap(),
8680 ScalarValue::try_new_null(&DataType::Duration(TimeUnit::Microsecond))
8681 .unwrap(),
8682 ScalarValue::try_new_null(&DataType::Duration(TimeUnit::Nanosecond)).unwrap(),
8683 ScalarValue::try_new_null(&DataType::Null).unwrap(),
8684 ];
8685 assert!(scalars.iter().all(|s| s.is_null()));
8686
8687 let field_ref = Arc::new(Field::new("foo", DataType::Int32, true));
8688 let map_field_ref = Arc::new(Field::new(
8689 "foo",
8690 DataType::Struct(Fields::from(vec![
8691 Field::new("bar", DataType::Utf8, true),
8692 Field::new("baz", DataType::Int32, true),
8693 ])),
8694 true,
8695 ));
8696 let scalars = [
8697 ScalarValue::try_new_null(&DataType::List(Arc::clone(&field_ref))).unwrap(),
8698 ScalarValue::try_new_null(&DataType::LargeList(Arc::clone(&field_ref)))
8699 .unwrap(),
8700 ScalarValue::try_new_null(&DataType::FixedSizeList(
8701 Arc::clone(&field_ref),
8702 42,
8703 ))
8704 .unwrap(),
8705 ScalarValue::try_new_null(&DataType::Struct(
8706 vec![Arc::clone(&field_ref)].into(),
8707 ))
8708 .unwrap(),
8709 ScalarValue::try_new_null(&DataType::Map(map_field_ref, false)).unwrap(),
8710 ScalarValue::try_new_null(&DataType::Union(
8711 UnionFields::new(vec![42], vec![field_ref]),
8712 UnionMode::Dense,
8713 ))
8714 .unwrap(),
8715 ];
8716 assert!(scalars.iter().all(|s| s.is_null()));
8717 }
8718
8719 fn assert_starts_with(actual: impl AsRef<str>, expected_prefix: impl AsRef<str>) {
8722 let actual = actual.as_ref();
8723 let expected_prefix = expected_prefix.as_ref();
8724 assert!(
8725 actual.starts_with(expected_prefix),
8726 "Expected '{actual}' to start with '{expected_prefix}'"
8727 );
8728 }
8729
8730 #[test]
8731 fn test_new_default() {
8732 assert_eq!(
8734 ScalarValue::new_default(&DataType::Int32).unwrap(),
8735 ScalarValue::Int32(Some(0))
8736 );
8737 assert_eq!(
8738 ScalarValue::new_default(&DataType::Float64).unwrap(),
8739 ScalarValue::Float64(Some(0.0))
8740 );
8741 assert_eq!(
8742 ScalarValue::new_default(&DataType::Boolean).unwrap(),
8743 ScalarValue::Boolean(Some(false))
8744 );
8745
8746 assert_eq!(
8748 ScalarValue::new_default(&DataType::Utf8).unwrap(),
8749 ScalarValue::Utf8(Some("".to_string()))
8750 );
8751 assert_eq!(
8752 ScalarValue::new_default(&DataType::LargeUtf8).unwrap(),
8753 ScalarValue::LargeUtf8(Some("".to_string()))
8754 );
8755
8756 assert_eq!(
8758 ScalarValue::new_default(&DataType::Binary).unwrap(),
8759 ScalarValue::Binary(Some(vec![]))
8760 );
8761
8762 assert_eq!(
8764 ScalarValue::new_default(&DataType::FixedSizeBinary(5)).unwrap(),
8765 ScalarValue::FixedSizeBinary(5, Some(vec![0, 0, 0, 0, 0]))
8766 );
8767
8768 assert_eq!(
8770 ScalarValue::new_default(&DataType::Date32).unwrap(),
8771 ScalarValue::Date32(Some(0))
8772 );
8773 assert_eq!(
8774 ScalarValue::new_default(&DataType::Time32(TimeUnit::Second)).unwrap(),
8775 ScalarValue::Time32Second(Some(0))
8776 );
8777
8778 assert_eq!(
8780 ScalarValue::new_default(&DataType::Decimal128(10, 2)).unwrap(),
8781 ScalarValue::Decimal128(Some(0), 10, 2)
8782 );
8783
8784 let list_field = Field::new_list_field(DataType::Int32, true);
8786 let list_result =
8787 ScalarValue::new_default(&DataType::List(Arc::new(list_field.clone())))
8788 .unwrap();
8789 match list_result {
8790 ScalarValue::List(arr) => {
8791 assert_eq!(arr.len(), 1);
8792 assert_eq!(arr.value_length(0), 0); }
8794 _ => panic!("Expected List"),
8795 }
8796
8797 let struct_fields = Fields::from(vec![
8799 Field::new("a", DataType::Int32, false),
8800 Field::new("b", DataType::Utf8, false),
8801 ]);
8802 let struct_result =
8803 ScalarValue::new_default(&DataType::Struct(struct_fields.clone())).unwrap();
8804 match struct_result {
8805 ScalarValue::Struct(arr) => {
8806 assert_eq!(arr.len(), 1);
8807 assert_eq!(arr.column(0).as_primitive::<Int32Type>().value(0), 0);
8808 assert_eq!(arr.column(1).as_string::<i32>().value(0), "");
8809 }
8810 _ => panic!("Expected Struct"),
8811 }
8812
8813 let union_fields = UnionFields::new(
8815 vec![0, 1],
8816 vec![
8817 Field::new("i32", DataType::Int32, false),
8818 Field::new("f64", DataType::Float64, false),
8819 ],
8820 );
8821 let union_result = ScalarValue::new_default(&DataType::Union(
8822 union_fields.clone(),
8823 UnionMode::Sparse,
8824 ))
8825 .unwrap();
8826 match union_result {
8827 ScalarValue::Union(Some((type_id, value)), _, _) => {
8828 assert_eq!(type_id, 0);
8829 assert_eq!(*value, ScalarValue::Int32(Some(0)));
8830 }
8831 _ => panic!("Expected Union"),
8832 }
8833 }
8834
8835 #[test]
8836 fn test_scalar_min() {
8837 assert_eq!(
8839 ScalarValue::min(&DataType::Int8),
8840 Some(ScalarValue::Int8(Some(i8::MIN)))
8841 );
8842 assert_eq!(
8843 ScalarValue::min(&DataType::Int32),
8844 Some(ScalarValue::Int32(Some(i32::MIN)))
8845 );
8846 assert_eq!(
8847 ScalarValue::min(&DataType::UInt8),
8848 Some(ScalarValue::UInt8(Some(0)))
8849 );
8850 assert_eq!(
8851 ScalarValue::min(&DataType::UInt64),
8852 Some(ScalarValue::UInt64(Some(0)))
8853 );
8854
8855 assert_eq!(
8857 ScalarValue::min(&DataType::Float32),
8858 Some(ScalarValue::Float32(Some(f32::NEG_INFINITY)))
8859 );
8860 assert_eq!(
8861 ScalarValue::min(&DataType::Float64),
8862 Some(ScalarValue::Float64(Some(f64::NEG_INFINITY)))
8863 );
8864
8865 let decimal_min = ScalarValue::min(&DataType::Decimal128(5, 2)).unwrap();
8867 match decimal_min {
8868 ScalarValue::Decimal128(Some(val), 5, 2) => {
8869 assert_eq!(val, -99999); }
8871 _ => panic!("Expected Decimal128"),
8872 }
8873
8874 assert_eq!(
8876 ScalarValue::min(&DataType::Date32),
8877 Some(ScalarValue::Date32(Some(i32::MIN)))
8878 );
8879 assert_eq!(
8880 ScalarValue::min(&DataType::Time32(TimeUnit::Second)),
8881 Some(ScalarValue::Time32Second(Some(0)))
8882 );
8883 assert_eq!(
8884 ScalarValue::min(&DataType::Timestamp(TimeUnit::Nanosecond, None)),
8885 Some(ScalarValue::TimestampNanosecond(Some(i64::MIN), None))
8886 );
8887
8888 assert_eq!(
8890 ScalarValue::min(&DataType::Duration(TimeUnit::Second)),
8891 Some(ScalarValue::DurationSecond(Some(i64::MIN)))
8892 );
8893
8894 assert_eq!(ScalarValue::min(&DataType::Utf8), None);
8896 assert_eq!(ScalarValue::min(&DataType::Binary), None);
8897 assert_eq!(
8898 ScalarValue::min(&DataType::List(Arc::new(Field::new(
8899 "item",
8900 DataType::Int32,
8901 true
8902 )))),
8903 None
8904 );
8905 }
8906
8907 #[test]
8908 fn test_scalar_max() {
8909 assert_eq!(
8911 ScalarValue::max(&DataType::Int8),
8912 Some(ScalarValue::Int8(Some(i8::MAX)))
8913 );
8914 assert_eq!(
8915 ScalarValue::max(&DataType::Int32),
8916 Some(ScalarValue::Int32(Some(i32::MAX)))
8917 );
8918 assert_eq!(
8919 ScalarValue::max(&DataType::UInt8),
8920 Some(ScalarValue::UInt8(Some(u8::MAX)))
8921 );
8922 assert_eq!(
8923 ScalarValue::max(&DataType::UInt64),
8924 Some(ScalarValue::UInt64(Some(u64::MAX)))
8925 );
8926
8927 assert_eq!(
8929 ScalarValue::max(&DataType::Float32),
8930 Some(ScalarValue::Float32(Some(f32::INFINITY)))
8931 );
8932 assert_eq!(
8933 ScalarValue::max(&DataType::Float64),
8934 Some(ScalarValue::Float64(Some(f64::INFINITY)))
8935 );
8936
8937 let decimal_max = ScalarValue::max(&DataType::Decimal128(5, 2)).unwrap();
8939 match decimal_max {
8940 ScalarValue::Decimal128(Some(val), 5, 2) => {
8941 assert_eq!(val, 99999); }
8943 _ => panic!("Expected Decimal128"),
8944 }
8945
8946 assert_eq!(
8948 ScalarValue::max(&DataType::Date32),
8949 Some(ScalarValue::Date32(Some(i32::MAX)))
8950 );
8951 assert_eq!(
8952 ScalarValue::max(&DataType::Time32(TimeUnit::Second)),
8953 Some(ScalarValue::Time32Second(Some(86_399))) );
8955 assert_eq!(
8956 ScalarValue::max(&DataType::Time64(TimeUnit::Microsecond)),
8957 Some(ScalarValue::Time64Microsecond(Some(86_399_999_999))) );
8959 assert_eq!(
8960 ScalarValue::max(&DataType::Timestamp(TimeUnit::Nanosecond, None)),
8961 Some(ScalarValue::TimestampNanosecond(Some(i64::MAX), None))
8962 );
8963
8964 assert_eq!(
8966 ScalarValue::max(&DataType::Duration(TimeUnit::Millisecond)),
8967 Some(ScalarValue::DurationMillisecond(Some(i64::MAX)))
8968 );
8969
8970 assert_eq!(ScalarValue::max(&DataType::Utf8), None);
8972 assert_eq!(ScalarValue::max(&DataType::Binary), None);
8973 assert_eq!(
8974 ScalarValue::max(&DataType::Struct(Fields::from(vec![Field::new(
8975 "field",
8976 DataType::Int32,
8977 true
8978 )]))),
8979 None
8980 );
8981 }
8982
8983 #[test]
8984 fn test_min_max_float16() {
8985 let min_f16 = ScalarValue::min(&DataType::Float16).unwrap();
8987 match min_f16 {
8988 ScalarValue::Float16(Some(val)) => {
8989 assert_eq!(val, f16::NEG_INFINITY);
8990 }
8991 _ => panic!("Expected Float16"),
8992 }
8993
8994 let max_f16 = ScalarValue::max(&DataType::Float16).unwrap();
8995 match max_f16 {
8996 ScalarValue::Float16(Some(val)) => {
8997 assert_eq!(val, f16::INFINITY);
8998 }
8999 _ => panic!("Expected Float16"),
9000 }
9001 }
9002
9003 #[test]
9004 fn test_new_default_interval() {
9005 assert_eq!(
9007 ScalarValue::new_default(&DataType::Interval(IntervalUnit::YearMonth))
9008 .unwrap(),
9009 ScalarValue::IntervalYearMonth(Some(0))
9010 );
9011 assert_eq!(
9012 ScalarValue::new_default(&DataType::Interval(IntervalUnit::DayTime)).unwrap(),
9013 ScalarValue::IntervalDayTime(Some(IntervalDayTime::ZERO))
9014 );
9015 assert_eq!(
9016 ScalarValue::new_default(&DataType::Interval(IntervalUnit::MonthDayNano))
9017 .unwrap(),
9018 ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano::ZERO))
9019 );
9020 }
9021
9022 #[test]
9023 fn test_min_max_with_timezone() {
9024 let tz = Some(Arc::from("UTC"));
9025
9026 let min_ts =
9028 ScalarValue::min(&DataType::Timestamp(TimeUnit::Second, tz.clone())).unwrap();
9029 match min_ts {
9030 ScalarValue::TimestampSecond(Some(val), Some(tz_str)) => {
9031 assert_eq!(val, i64::MIN);
9032 assert_eq!(tz_str.as_ref(), "UTC");
9033 }
9034 _ => panic!("Expected TimestampSecond with timezone"),
9035 }
9036
9037 let max_ts =
9038 ScalarValue::max(&DataType::Timestamp(TimeUnit::Millisecond, tz.clone()))
9039 .unwrap();
9040 match max_ts {
9041 ScalarValue::TimestampMillisecond(Some(val), Some(tz_str)) => {
9042 assert_eq!(val, i64::MAX);
9043 assert_eq!(tz_str.as_ref(), "UTC");
9044 }
9045 _ => panic!("Expected TimestampMillisecond with timezone"),
9046 }
9047 }
9048
9049 #[test]
9050 fn test_convert_array_to_scalar_vec() {
9051 let list = ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
9053 Some(vec![Some(1), Some(2)]),
9054 None,
9055 Some(vec![Some(3), None, Some(4)]),
9056 ]);
9057 let converted = ScalarValue::convert_array_to_scalar_vec(&list).unwrap();
9058 assert_eq!(
9059 converted,
9060 vec![
9061 Some(vec![
9062 ScalarValue::Int64(Some(1)),
9063 ScalarValue::Int64(Some(2))
9064 ]),
9065 None,
9066 Some(vec![
9067 ScalarValue::Int64(Some(3)),
9068 ScalarValue::Int64(None),
9069 ScalarValue::Int64(Some(4))
9070 ]),
9071 ]
9072 );
9073
9074 let large_list = LargeListArray::from_iter_primitive::<Int64Type, _, _>(vec![
9076 Some(vec![Some(1), Some(2)]),
9077 None,
9078 Some(vec![Some(3), None, Some(4)]),
9079 ]);
9080 let converted = ScalarValue::convert_array_to_scalar_vec(&large_list).unwrap();
9081 assert_eq!(
9082 converted,
9083 vec![
9084 Some(vec![
9085 ScalarValue::Int64(Some(1)),
9086 ScalarValue::Int64(Some(2))
9087 ]),
9088 None,
9089 Some(vec![
9090 ScalarValue::Int64(Some(3)),
9091 ScalarValue::Int64(None),
9092 ScalarValue::Int64(Some(4))
9093 ]),
9094 ]
9095 );
9096
9097 let funky = ListArray::new(
9101 Field::new_list_field(DataType::Int64, true).into(),
9102 OffsetBuffer::new(vec![0, 2, 4, 5].into()),
9103 Arc::new(Int64Array::from(vec![1, 2, 3, 4, 5, 6])),
9104 Some(NullBuffer::from(vec![true, false, true])),
9105 );
9106 let converted = ScalarValue::convert_array_to_scalar_vec(&funky).unwrap();
9107 assert_eq!(
9108 converted,
9109 vec![
9110 Some(vec![
9111 ScalarValue::Int64(Some(1)),
9112 ScalarValue::Int64(Some(2))
9113 ]),
9114 None,
9115 Some(vec![ScalarValue::Int64(Some(5))]),
9116 ]
9117 );
9118
9119 let array4 = ListArray::new(
9123 Field::new_list_field(DataType::Int64, true).into(),
9124 OffsetBuffer::new(vec![0, 2, 2, 5].into()),
9125 Arc::new(Int64Array::from(vec![1, 2, 3, 4, 5, 6])),
9126 Some(NullBuffer::from(vec![true, false, true])),
9127 );
9128 let converted = ScalarValue::convert_array_to_scalar_vec(&array4).unwrap();
9129 assert_eq!(
9130 converted,
9131 vec![
9132 Some(vec![
9133 ScalarValue::Int64(Some(1)),
9134 ScalarValue::Int64(Some(2))
9135 ]),
9136 None,
9137 Some(vec![
9138 ScalarValue::Int64(Some(3)),
9139 ScalarValue::Int64(Some(4)),
9140 ScalarValue::Int64(Some(5)),
9141 ]),
9142 ]
9143 );
9144
9145 let array5 = ListArray::new(
9148 Field::new_list_field(DataType::Int64, true).into(),
9149 OffsetBuffer::new(vec![0, 2, 2, 5].into()),
9150 Arc::new(Int64Array::from(vec![1, 2, 3, 4, 5, 6])),
9151 Some(NullBuffer::from(vec![true, true, true])),
9152 );
9153 let converted = ScalarValue::convert_array_to_scalar_vec(&array5).unwrap();
9154 assert_eq!(
9155 converted,
9156 vec![
9157 Some(vec![
9158 ScalarValue::Int64(Some(1)),
9159 ScalarValue::Int64(Some(2))
9160 ]),
9161 Some(vec![]),
9162 Some(vec![
9163 ScalarValue::Int64(Some(3)),
9164 ScalarValue::Int64(Some(4)),
9165 ScalarValue::Int64(Some(5)),
9166 ]),
9167 ]
9168 );
9169 }
9170}