1use std::any::Any;
19use std::num::NonZeroI64;
20use std::ops::{Add, Sub};
21use std::str::FromStr;
22use std::sync::Arc;
23
24use arrow::array::temporal_conversions::{
25 as_datetime_with_timezone, timestamp_ns_to_datetime,
26};
27use arrow::array::timezone::Tz;
28use arrow::array::types::{
29 ArrowTimestampType, TimestampMicrosecondType, TimestampMillisecondType,
30 TimestampNanosecondType, TimestampSecondType,
31};
32use arrow::array::{Array, ArrayRef, PrimitiveArray};
33use arrow::datatypes::DataType::{self, Null, Timestamp, Utf8, Utf8View};
34use arrow::datatypes::TimeUnit::{self, Microsecond, Millisecond, Nanosecond, Second};
35use datafusion_common::cast::as_primitive_array;
36use datafusion_common::{
37 exec_datafusion_err, exec_err, plan_err, DataFusionError, Result, ScalarValue,
38};
39use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
40use datafusion_expr::TypeSignature::Exact;
41use datafusion_expr::{
42 ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD,
43};
44use datafusion_macros::user_doc;
45
46use chrono::{
47 DateTime, Datelike, Duration, LocalResult, NaiveDateTime, Offset, TimeDelta, Timelike,
48};
49
50#[derive(Debug, Clone, Copy, PartialEq, Eq)]
52enum DateTruncGranularity {
53 Microsecond,
54 Millisecond,
55 Second,
56 Minute,
57 Hour,
58 Day,
59 Week,
60 Month,
61 Quarter,
62 Year,
63}
64
65impl DateTruncGranularity {
66 const SUPPORTED_GRANULARITIES: &[&str] = &[
70 "microsecond",
71 "millisecond",
72 "second",
73 "minute",
74 "hour",
75 "day",
76 "week",
77 "month",
78 "quarter",
79 "year",
80 ];
81
82 fn from_str(s: &str) -> Result<Self> {
84 match s.to_lowercase().as_str() {
86 "microsecond" => Ok(Self::Microsecond),
87 "millisecond" => Ok(Self::Millisecond),
88 "second" => Ok(Self::Second),
89 "minute" => Ok(Self::Minute),
90 "hour" => Ok(Self::Hour),
91 "day" => Ok(Self::Day),
92 "week" => Ok(Self::Week),
93 "month" => Ok(Self::Month),
94 "quarter" => Ok(Self::Quarter),
95 "year" => Ok(Self::Year),
96 _ => {
97 let supported = Self::SUPPORTED_GRANULARITIES.join(", ");
98 exec_err!(
99 "Unsupported date_trunc granularity: '{s}'. Supported values are: {supported}"
100 )
101 }
102 }
103 }
104
105 fn is_fine_granularity(&self) -> bool {
108 matches!(
109 self,
110 Self::Second | Self::Minute | Self::Millisecond | Self::Microsecond
111 )
112 }
113
114 fn is_fine_granularity_utc(&self) -> bool {
117 self.is_fine_granularity() || matches!(self, Self::Hour | Self::Day)
118 }
119}
120
121#[user_doc(
122 doc_section(label = "Time and Date Functions"),
123 description = "Truncates a timestamp value to a specified precision.",
124 syntax_example = "date_trunc(precision, expression)",
125 argument(
126 name = "precision",
127 description = r#"Time precision to truncate to. The following precisions are supported:
128
129 - year / YEAR
130 - quarter / QUARTER
131 - month / MONTH
132 - week / WEEK
133 - day / DAY
134 - hour / HOUR
135 - minute / MINUTE
136 - second / SECOND
137 - millisecond / MILLISECOND
138 - microsecond / MICROSECOND
139"#
140 ),
141 argument(
142 name = "expression",
143 description = "Time expression to operate on. Can be a constant, column, or function."
144 )
145)]
146#[derive(Debug, PartialEq, Eq, Hash)]
147pub struct DateTruncFunc {
148 signature: Signature,
149 aliases: Vec<String>,
150}
151
152impl Default for DateTruncFunc {
153 fn default() -> Self {
154 Self::new()
155 }
156}
157
158impl DateTruncFunc {
159 pub fn new() -> Self {
160 Self {
161 signature: Signature::one_of(
162 vec![
163 Exact(vec![Utf8, Timestamp(Nanosecond, None)]),
164 Exact(vec![Utf8View, Timestamp(Nanosecond, None)]),
165 Exact(vec![
166 Utf8,
167 Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())),
168 ]),
169 Exact(vec![
170 Utf8View,
171 Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())),
172 ]),
173 Exact(vec![Utf8, Timestamp(Microsecond, None)]),
174 Exact(vec![Utf8View, Timestamp(Microsecond, None)]),
175 Exact(vec![
176 Utf8,
177 Timestamp(Microsecond, Some(TIMEZONE_WILDCARD.into())),
178 ]),
179 Exact(vec![
180 Utf8View,
181 Timestamp(Microsecond, Some(TIMEZONE_WILDCARD.into())),
182 ]),
183 Exact(vec![Utf8, Timestamp(Millisecond, None)]),
184 Exact(vec![Utf8View, Timestamp(Millisecond, None)]),
185 Exact(vec![
186 Utf8,
187 Timestamp(Millisecond, Some(TIMEZONE_WILDCARD.into())),
188 ]),
189 Exact(vec![
190 Utf8View,
191 Timestamp(Millisecond, Some(TIMEZONE_WILDCARD.into())),
192 ]),
193 Exact(vec![Utf8, Timestamp(Second, None)]),
194 Exact(vec![Utf8View, Timestamp(Second, None)]),
195 Exact(vec![
196 Utf8,
197 Timestamp(Second, Some(TIMEZONE_WILDCARD.into())),
198 ]),
199 Exact(vec![
200 Utf8View,
201 Timestamp(Second, Some(TIMEZONE_WILDCARD.into())),
202 ]),
203 ],
204 Volatility::Immutable,
205 ),
206 aliases: vec![String::from("datetrunc")],
207 }
208 }
209}
210
211impl ScalarUDFImpl for DateTruncFunc {
212 fn as_any(&self) -> &dyn Any {
213 self
214 }
215
216 fn name(&self) -> &str {
217 "date_trunc"
218 }
219
220 fn signature(&self) -> &Signature {
221 &self.signature
222 }
223
224 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
225 match &arg_types[1] {
226 Timestamp(Nanosecond, None) | Utf8 | DataType::Date32 | Null => {
227 Ok(Timestamp(Nanosecond, None))
228 }
229 Timestamp(Nanosecond, tz_opt) => Ok(Timestamp(Nanosecond, tz_opt.clone())),
230 Timestamp(Microsecond, tz_opt) => Ok(Timestamp(Microsecond, tz_opt.clone())),
231 Timestamp(Millisecond, tz_opt) => Ok(Timestamp(Millisecond, tz_opt.clone())),
232 Timestamp(Second, tz_opt) => Ok(Timestamp(Second, tz_opt.clone())),
233 _ => plan_err!(
234 "The date_trunc function can only accept timestamp as the second arg."
235 ),
236 }
237 }
238
239 fn invoke_with_args(
240 &self,
241 args: datafusion_expr::ScalarFunctionArgs,
242 ) -> Result<ColumnarValue> {
243 let args = args.args;
244 let (granularity, array) = (&args[0], &args[1]);
245
246 let granularity_str = if let ColumnarValue::Scalar(ScalarValue::Utf8(Some(v))) =
247 granularity
248 {
249 v.to_lowercase()
250 } else if let ColumnarValue::Scalar(ScalarValue::Utf8View(Some(v))) = granularity
251 {
252 v.to_lowercase()
253 } else {
254 return exec_err!("Granularity of `date_trunc` must be non-null scalar Utf8");
255 };
256
257 let granularity = DateTruncGranularity::from_str(&granularity_str)?;
258
259 fn process_array<T: ArrowTimestampType>(
260 array: &dyn Array,
261 granularity: DateTruncGranularity,
262 tz_opt: &Option<Arc<str>>,
263 ) -> Result<ColumnarValue> {
264 let parsed_tz = parse_tz(tz_opt)?;
265 let array = as_primitive_array::<T>(array)?;
266
267 if granularity.is_fine_granularity()
273 || (parsed_tz.is_none() && granularity.is_fine_granularity_utc())
274 {
275 let result = general_date_trunc_array_fine_granularity(
276 T::UNIT,
277 array,
278 granularity,
279 )?;
280 return Ok(ColumnarValue::Array(result));
281 }
282
283 let array: PrimitiveArray<T> = array
284 .try_unary(|x| general_date_trunc(T::UNIT, x, parsed_tz, granularity))?
285 .with_timezone_opt(tz_opt.clone());
286 Ok(ColumnarValue::Array(Arc::new(array)))
287 }
288
289 fn process_scalar<T: ArrowTimestampType>(
290 v: &Option<i64>,
291 granularity: DateTruncGranularity,
292 tz_opt: &Option<Arc<str>>,
293 ) -> Result<ColumnarValue> {
294 let parsed_tz = parse_tz(tz_opt)?;
295 let value = if let Some(v) = v {
296 Some(general_date_trunc(T::UNIT, *v, parsed_tz, granularity)?)
297 } else {
298 None
299 };
300 let value = ScalarValue::new_timestamp::<T>(value, tz_opt.clone());
301 Ok(ColumnarValue::Scalar(value))
302 }
303
304 Ok(match array {
305 ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(v, tz_opt)) => {
306 process_scalar::<TimestampNanosecondType>(v, granularity, tz_opt)?
307 }
308 ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond(v, tz_opt)) => {
309 process_scalar::<TimestampMicrosecondType>(v, granularity, tz_opt)?
310 }
311 ColumnarValue::Scalar(ScalarValue::TimestampMillisecond(v, tz_opt)) => {
312 process_scalar::<TimestampMillisecondType>(v, granularity, tz_opt)?
313 }
314 ColumnarValue::Scalar(ScalarValue::TimestampSecond(v, tz_opt)) => {
315 process_scalar::<TimestampSecondType>(v, granularity, tz_opt)?
316 }
317 ColumnarValue::Array(array) => {
318 let array_type = array.data_type();
319 if let Timestamp(unit, tz_opt) = array_type {
320 match unit {
321 Second => process_array::<TimestampSecondType>(
322 array,
323 granularity,
324 tz_opt,
325 )?,
326 Millisecond => process_array::<TimestampMillisecondType>(
327 array,
328 granularity,
329 tz_opt,
330 )?,
331 Microsecond => process_array::<TimestampMicrosecondType>(
332 array,
333 granularity,
334 tz_opt,
335 )?,
336 Nanosecond => process_array::<TimestampNanosecondType>(
337 array,
338 granularity,
339 tz_opt,
340 )?,
341 }
342 } else {
343 return exec_err!("second argument of `date_trunc` is an unsupported array type: {array_type}");
344 }
345 }
346 _ => {
347 return exec_err!(
348 "second argument of `date_trunc` must be timestamp scalar or array"
349 );
350 }
351 })
352 }
353
354 fn aliases(&self) -> &[String] {
355 &self.aliases
356 }
357
358 fn output_ordering(&self, input: &[ExprProperties]) -> Result<SortProperties> {
359 let precision = &input[0];
361 let date_value = &input[1];
362
363 if precision.sort_properties.eq(&SortProperties::Singleton) {
364 Ok(date_value.sort_properties)
365 } else {
366 Ok(SortProperties::Unordered)
367 }
368 }
369 fn documentation(&self) -> Option<&Documentation> {
370 self.doc()
371 }
372}
373
374fn _date_trunc_coarse<T>(
375 granularity: DateTruncGranularity,
376 value: Option<T>,
377) -> Result<Option<T>>
378where
379 T: Datelike + Timelike + Sub<Duration, Output = T> + Copy,
380{
381 let value = match granularity {
382 DateTruncGranularity::Millisecond => value,
383 DateTruncGranularity::Microsecond => value,
384 DateTruncGranularity::Second => value.and_then(|d| d.with_nanosecond(0)),
385 DateTruncGranularity::Minute => value
386 .and_then(|d| d.with_nanosecond(0))
387 .and_then(|d| d.with_second(0)),
388 DateTruncGranularity::Hour => value
389 .and_then(|d| d.with_nanosecond(0))
390 .and_then(|d| d.with_second(0))
391 .and_then(|d| d.with_minute(0)),
392 DateTruncGranularity::Day => value
393 .and_then(|d| d.with_nanosecond(0))
394 .and_then(|d| d.with_second(0))
395 .and_then(|d| d.with_minute(0))
396 .and_then(|d| d.with_hour(0)),
397 DateTruncGranularity::Week => value
398 .and_then(|d| d.with_nanosecond(0))
399 .and_then(|d| d.with_second(0))
400 .and_then(|d| d.with_minute(0))
401 .and_then(|d| d.with_hour(0))
402 .map(|d| {
403 d - TimeDelta::try_seconds(60 * 60 * 24 * d.weekday() as i64).unwrap()
404 }),
405 DateTruncGranularity::Month => value
406 .and_then(|d| d.with_nanosecond(0))
407 .and_then(|d| d.with_second(0))
408 .and_then(|d| d.with_minute(0))
409 .and_then(|d| d.with_hour(0))
410 .and_then(|d| d.with_day0(0)),
411 DateTruncGranularity::Quarter => value
412 .and_then(|d| d.with_nanosecond(0))
413 .and_then(|d| d.with_second(0))
414 .and_then(|d| d.with_minute(0))
415 .and_then(|d| d.with_hour(0))
416 .and_then(|d| d.with_day0(0))
417 .and_then(|d| d.with_month(quarter_month(&d))),
418 DateTruncGranularity::Year => value
419 .and_then(|d| d.with_nanosecond(0))
420 .and_then(|d| d.with_second(0))
421 .and_then(|d| d.with_minute(0))
422 .and_then(|d| d.with_hour(0))
423 .and_then(|d| d.with_day0(0))
424 .and_then(|d| d.with_month0(0)),
425 };
426 Ok(value)
427}
428
429fn quarter_month<T>(date: &T) -> u32
430where
431 T: Datelike,
432{
433 1 + 3 * ((date.month() - 1) / 3)
434}
435
436fn _date_trunc_coarse_with_tz(
437 granularity: DateTruncGranularity,
438 value: Option<DateTime<Tz>>,
439) -> Result<Option<i64>> {
440 if let Some(value) = value {
441 let local = value.naive_local();
442 let truncated = _date_trunc_coarse::<NaiveDateTime>(granularity, Some(local))?;
443 let truncated = truncated.and_then(|truncated| {
444 match truncated.and_local_timezone(value.timezone()) {
445 LocalResult::None => {
446 truncated
452 .sub(TimeDelta::try_hours(3).unwrap())
453 .and_local_timezone(value.timezone())
454 .single()
455 .map(|v| v.add(TimeDelta::try_hours(3).unwrap()))
456 }
457 LocalResult::Single(datetime) => Some(datetime),
458 LocalResult::Ambiguous(datetime1, datetime2) => {
459 if datetime1.offset().fix() == value.offset().fix() {
464 Some(datetime1)
465 } else {
466 Some(datetime2)
467 }
468 }
469 }
470 });
471 Ok(truncated.and_then(|value| value.timestamp_nanos_opt()))
472 } else {
473 _date_trunc_coarse::<NaiveDateTime>(granularity, None)?;
474 Ok(None)
475 }
476}
477
478fn _date_trunc_coarse_without_tz(
479 granularity: DateTruncGranularity,
480 value: Option<NaiveDateTime>,
481) -> Result<Option<i64>> {
482 let value = _date_trunc_coarse::<NaiveDateTime>(granularity, value)?;
483 Ok(value.and_then(|value| value.and_utc().timestamp_nanos_opt()))
484}
485
486fn date_trunc_coarse(
491 granularity: DateTruncGranularity,
492 value: i64,
493 tz: Option<Tz>,
494) -> Result<i64> {
495 let value = match tz {
496 Some(tz) => {
497 let value = as_datetime_with_timezone::<TimestampNanosecondType>(value, tz)
500 .ok_or(exec_datafusion_err!("Timestamp {value} out of range"))?;
501 _date_trunc_coarse_with_tz(granularity, Some(value))
502 }
503 None => {
504 let value = timestamp_ns_to_datetime(value)
506 .ok_or_else(|| exec_datafusion_err!("Timestamp {value} out of range"))?;
507 _date_trunc_coarse_without_tz(granularity, Some(value))
508 }
509 }?;
510
511 Ok(value.unwrap())
513}
514
515fn general_date_trunc_array_fine_granularity<T: ArrowTimestampType>(
522 tu: TimeUnit,
523 array: &PrimitiveArray<T>,
524 granularity: DateTruncGranularity,
525) -> Result<ArrayRef> {
526 let unit = match (tu, granularity) {
527 (Second, DateTruncGranularity::Minute) => NonZeroI64::new(60),
528 (Second, DateTruncGranularity::Hour) => NonZeroI64::new(3600),
529 (Second, DateTruncGranularity::Day) => NonZeroI64::new(86400),
530
531 (Millisecond, DateTruncGranularity::Second) => NonZeroI64::new(1_000),
532 (Millisecond, DateTruncGranularity::Minute) => NonZeroI64::new(60_000),
533 (Millisecond, DateTruncGranularity::Hour) => NonZeroI64::new(3_600_000),
534 (Millisecond, DateTruncGranularity::Day) => NonZeroI64::new(86_400_000),
535
536 (Microsecond, DateTruncGranularity::Millisecond) => NonZeroI64::new(1_000),
537 (Microsecond, DateTruncGranularity::Second) => NonZeroI64::new(1_000_000),
538 (Microsecond, DateTruncGranularity::Minute) => NonZeroI64::new(60_000_000),
539 (Microsecond, DateTruncGranularity::Hour) => NonZeroI64::new(3_600_000_000),
540 (Microsecond, DateTruncGranularity::Day) => NonZeroI64::new(86_400_000_000),
541
542 (Nanosecond, DateTruncGranularity::Microsecond) => NonZeroI64::new(1_000),
543 (Nanosecond, DateTruncGranularity::Millisecond) => NonZeroI64::new(1_000_000),
544 (Nanosecond, DateTruncGranularity::Second) => NonZeroI64::new(1_000_000_000),
545 (Nanosecond, DateTruncGranularity::Minute) => NonZeroI64::new(60_000_000_000),
546 (Nanosecond, DateTruncGranularity::Hour) => NonZeroI64::new(3_600_000_000_000),
547 (Nanosecond, DateTruncGranularity::Day) => NonZeroI64::new(86_400_000_000_000),
548 _ => None,
549 };
550
551 if let Some(unit) = unit {
552 let unit = unit.get();
553 let array = PrimitiveArray::<T>::from_iter_values_with_nulls(
554 array
555 .values()
556 .iter()
557 .map(|v| *v - i64::rem_euclid(*v, unit)),
558 array.nulls().cloned(),
559 );
560 Ok(Arc::new(array))
561 } else {
562 Ok(Arc::new(array.clone()))
564 }
565}
566
567fn general_date_trunc(
569 tu: TimeUnit,
570 value: i64,
571 tz: Option<Tz>,
572 granularity: DateTruncGranularity,
573) -> Result<i64, DataFusionError> {
574 let scale = match tu {
575 Second => 1_000_000_000,
576 Millisecond => 1_000_000,
577 Microsecond => 1_000,
578 Nanosecond => 1,
579 };
580
581 let nano = date_trunc_coarse(granularity, scale * value, tz)?;
583
584 let result = match tu {
585 Second => match granularity {
586 DateTruncGranularity::Minute => nano / 1_000_000_000 / 60 * 60,
587 _ => nano / 1_000_000_000,
588 },
589 Millisecond => match granularity {
590 DateTruncGranularity::Minute => nano / 1_000_000 / 1_000 / 60 * 1_000 * 60,
591 DateTruncGranularity::Second => nano / 1_000_000 / 1_000 * 1_000,
592 _ => nano / 1_000_000,
593 },
594 Microsecond => match granularity {
595 DateTruncGranularity::Minute => {
596 nano / 1_000 / 1_000_000 / 60 * 60 * 1_000_000
597 }
598 DateTruncGranularity::Second => nano / 1_000 / 1_000_000 * 1_000_000,
599 DateTruncGranularity::Millisecond => nano / 1_000 / 1_000 * 1_000,
600 _ => nano / 1_000,
601 },
602 _ => match granularity {
603 DateTruncGranularity::Minute => {
604 nano / 1_000_000_000 / 60 * 1_000_000_000 * 60
605 }
606 DateTruncGranularity::Second => nano / 1_000_000_000 * 1_000_000_000,
607 DateTruncGranularity::Millisecond => nano / 1_000_000 * 1_000_000,
608 DateTruncGranularity::Microsecond => nano / 1_000 * 1_000,
609 _ => nano,
610 },
611 };
612 Ok(result)
613}
614
615fn parse_tz(tz: &Option<Arc<str>>) -> Result<Option<Tz>> {
616 tz.as_ref()
617 .map(|tz| {
618 Tz::from_str(tz)
619 .map_err(|op| exec_datafusion_err!("failed on timezone {tz}: {op:?}"))
620 })
621 .transpose()
622}
623
624#[cfg(test)]
625mod tests {
626 use std::sync::Arc;
627
628 use crate::datetime::date_trunc::{
629 date_trunc_coarse, DateTruncFunc, DateTruncGranularity,
630 };
631
632 use arrow::array::cast::as_primitive_array;
633 use arrow::array::types::TimestampNanosecondType;
634 use arrow::array::{Array, TimestampNanosecondArray};
635 use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos;
636 use arrow::datatypes::{DataType, Field, TimeUnit};
637 use datafusion_common::config::ConfigOptions;
638 use datafusion_common::ScalarValue;
639 use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
640
641 #[test]
642 fn date_trunc_test() {
643 let cases = vec![
644 (
645 "2020-09-08T13:42:29.190855Z",
646 "second",
647 "2020-09-08T13:42:29.000000Z",
648 ),
649 (
650 "2020-09-08T13:42:29.190855Z",
651 "minute",
652 "2020-09-08T13:42:00.000000Z",
653 ),
654 (
655 "2020-09-08T13:42:29.190855Z",
656 "hour",
657 "2020-09-08T13:00:00.000000Z",
658 ),
659 (
660 "2020-09-08T13:42:29.190855Z",
661 "day",
662 "2020-09-08T00:00:00.000000Z",
663 ),
664 (
665 "2020-09-08T13:42:29.190855Z",
666 "week",
667 "2020-09-07T00:00:00.000000Z",
668 ),
669 (
670 "2020-09-08T13:42:29.190855Z",
671 "month",
672 "2020-09-01T00:00:00.000000Z",
673 ),
674 (
675 "2020-09-08T13:42:29.190855Z",
676 "year",
677 "2020-01-01T00:00:00.000000Z",
678 ),
679 (
681 "2021-01-01T13:42:29.190855Z",
682 "week",
683 "2020-12-28T00:00:00.000000Z",
684 ),
685 (
686 "2020-01-01T13:42:29.190855Z",
687 "week",
688 "2019-12-30T00:00:00.000000Z",
689 ),
690 (
692 "2020-01-01T13:42:29.190855Z",
693 "quarter",
694 "2020-01-01T00:00:00.000000Z",
695 ),
696 (
697 "2020-02-01T13:42:29.190855Z",
698 "quarter",
699 "2020-01-01T00:00:00.000000Z",
700 ),
701 (
702 "2020-03-01T13:42:29.190855Z",
703 "quarter",
704 "2020-01-01T00:00:00.000000Z",
705 ),
706 (
707 "2020-04-01T13:42:29.190855Z",
708 "quarter",
709 "2020-04-01T00:00:00.000000Z",
710 ),
711 (
712 "2020-08-01T13:42:29.190855Z",
713 "quarter",
714 "2020-07-01T00:00:00.000000Z",
715 ),
716 (
717 "2020-11-01T13:42:29.190855Z",
718 "quarter",
719 "2020-10-01T00:00:00.000000Z",
720 ),
721 (
722 "2020-12-01T13:42:29.190855Z",
723 "quarter",
724 "2020-10-01T00:00:00.000000Z",
725 ),
726 ];
727
728 cases.iter().for_each(|(original, granularity, expected)| {
729 let left = string_to_timestamp_nanos(original).unwrap();
730 let right = string_to_timestamp_nanos(expected).unwrap();
731 let granularity_enum = DateTruncGranularity::from_str(granularity).unwrap();
732 let result = date_trunc_coarse(granularity_enum, left, None).unwrap();
733 assert_eq!(result, right, "{original} = {expected}");
734 });
735 }
736
737 #[test]
738 fn test_date_trunc_timezones() {
739 let cases = [
740 (
741 vec![
742 "2020-09-08T00:00:00Z",
743 "2020-09-08T01:00:00Z",
744 "2020-09-08T02:00:00Z",
745 "2020-09-08T03:00:00Z",
746 "2020-09-08T04:00:00Z",
747 ],
748 Some("+00".into()),
749 vec![
750 "2020-09-08T00:00:00Z",
751 "2020-09-08T00:00:00Z",
752 "2020-09-08T00:00:00Z",
753 "2020-09-08T00:00:00Z",
754 "2020-09-08T00:00:00Z",
755 ],
756 ),
757 (
758 vec![
759 "2020-09-08T00:00:00Z",
760 "2020-09-08T01:00:00Z",
761 "2020-09-08T02:00:00Z",
762 "2020-09-08T03:00:00Z",
763 "2020-09-08T04:00:00Z",
764 ],
765 None,
766 vec![
767 "2020-09-08T00:00:00Z",
768 "2020-09-08T00:00:00Z",
769 "2020-09-08T00:00:00Z",
770 "2020-09-08T00:00:00Z",
771 "2020-09-08T00:00:00Z",
772 ],
773 ),
774 (
775 vec![
776 "2020-09-08T00:00:00Z",
777 "2020-09-08T01:00:00Z",
778 "2020-09-08T02:00:00Z",
779 "2020-09-08T03:00:00Z",
780 "2020-09-08T04:00:00Z",
781 ],
782 Some("-02".into()),
783 vec![
784 "2020-09-07T02:00:00Z",
785 "2020-09-07T02:00:00Z",
786 "2020-09-08T02:00:00Z",
787 "2020-09-08T02:00:00Z",
788 "2020-09-08T02:00:00Z",
789 ],
790 ),
791 (
792 vec![
793 "2020-09-08T00:00:00+05",
794 "2020-09-08T01:00:00+05",
795 "2020-09-08T02:00:00+05",
796 "2020-09-08T03:00:00+05",
797 "2020-09-08T04:00:00+05",
798 ],
799 Some("+05".into()),
800 vec![
801 "2020-09-08T00:00:00+05",
802 "2020-09-08T00:00:00+05",
803 "2020-09-08T00:00:00+05",
804 "2020-09-08T00:00:00+05",
805 "2020-09-08T00:00:00+05",
806 ],
807 ),
808 (
809 vec![
810 "2020-09-08T00:00:00+08",
811 "2020-09-08T01:00:00+08",
812 "2020-09-08T02:00:00+08",
813 "2020-09-08T03:00:00+08",
814 "2020-09-08T04:00:00+08",
815 ],
816 Some("+08".into()),
817 vec![
818 "2020-09-08T00:00:00+08",
819 "2020-09-08T00:00:00+08",
820 "2020-09-08T00:00:00+08",
821 "2020-09-08T00:00:00+08",
822 "2020-09-08T00:00:00+08",
823 ],
824 ),
825 (
826 vec![
827 "2024-10-26T23:00:00Z",
828 "2024-10-27T00:00:00Z",
829 "2024-10-27T01:00:00Z",
830 "2024-10-27T02:00:00Z",
831 ],
832 Some("Europe/Berlin".into()),
833 vec![
834 "2024-10-27T00:00:00+02",
835 "2024-10-27T00:00:00+02",
836 "2024-10-27T00:00:00+02",
837 "2024-10-27T00:00:00+02",
838 ],
839 ),
840 (
841 vec![
842 "2018-02-18T00:00:00Z",
843 "2018-02-18T01:00:00Z",
844 "2018-02-18T02:00:00Z",
845 "2018-02-18T03:00:00Z",
846 "2018-11-04T01:00:00Z",
847 "2018-11-04T02:00:00Z",
848 "2018-11-04T03:00:00Z",
849 "2018-11-04T04:00:00Z",
850 ],
851 Some("America/Sao_Paulo".into()),
852 vec![
853 "2018-02-17T00:00:00-02",
854 "2018-02-17T00:00:00-02",
855 "2018-02-17T00:00:00-02",
856 "2018-02-18T00:00:00-03",
857 "2018-11-03T00:00:00-03",
858 "2018-11-03T00:00:00-03",
859 "2018-11-04T01:00:00-02",
860 "2018-11-04T01:00:00-02",
861 ],
862 ),
863 ];
864
865 cases.iter().for_each(|(original, tz_opt, expected)| {
866 let input = original
867 .iter()
868 .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
869 .collect::<TimestampNanosecondArray>()
870 .with_timezone_opt(tz_opt.clone());
871 let right = expected
872 .iter()
873 .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
874 .collect::<TimestampNanosecondArray>()
875 .with_timezone_opt(tz_opt.clone());
876 let batch_len = input.len();
877 let arg_fields = vec![
878 Field::new("a", DataType::Utf8, false).into(),
879 Field::new("b", input.data_type().clone(), false).into(),
880 ];
881 let args = datafusion_expr::ScalarFunctionArgs {
882 args: vec![
883 ColumnarValue::Scalar(ScalarValue::from("day")),
884 ColumnarValue::Array(Arc::new(input)),
885 ],
886 arg_fields,
887 number_rows: batch_len,
888 return_field: Field::new(
889 "f",
890 DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()),
891 true,
892 )
893 .into(),
894 config_options: Arc::new(ConfigOptions::default()),
895 lambdas: None,
896 };
897 let result = DateTruncFunc::new().invoke_with_args(args).unwrap();
898 if let ColumnarValue::Array(result) = result {
899 assert_eq!(
900 result.data_type(),
901 &DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone())
902 );
903 let left = as_primitive_array::<TimestampNanosecondType>(&result);
904 assert_eq!(left, &right);
905 } else {
906 panic!("unexpected column type");
907 }
908 });
909 }
910
911 #[test]
912 fn test_date_trunc_hour_timezones() {
913 let cases = [
914 (
915 vec![
916 "2020-09-08T00:30:00Z",
917 "2020-09-08T01:30:00Z",
918 "2020-09-08T02:30:00Z",
919 "2020-09-08T03:30:00Z",
920 "2020-09-08T04:30:00Z",
921 ],
922 Some("+00".into()),
923 vec![
924 "2020-09-08T00:00:00Z",
925 "2020-09-08T01:00:00Z",
926 "2020-09-08T02:00:00Z",
927 "2020-09-08T03:00:00Z",
928 "2020-09-08T04:00:00Z",
929 ],
930 ),
931 (
932 vec![
933 "2020-09-08T00:30:00Z",
934 "2020-09-08T01:30:00Z",
935 "2020-09-08T02:30:00Z",
936 "2020-09-08T03:30:00Z",
937 "2020-09-08T04:30:00Z",
938 ],
939 None,
940 vec![
941 "2020-09-08T00:00:00Z",
942 "2020-09-08T01:00:00Z",
943 "2020-09-08T02:00:00Z",
944 "2020-09-08T03:00:00Z",
945 "2020-09-08T04:00:00Z",
946 ],
947 ),
948 (
949 vec![
950 "2020-09-08T00:30:00Z",
951 "2020-09-08T01:30:00Z",
952 "2020-09-08T02:30:00Z",
953 "2020-09-08T03:30:00Z",
954 "2020-09-08T04:30:00Z",
955 ],
956 Some("-02".into()),
957 vec![
958 "2020-09-08T00:00:00Z",
959 "2020-09-08T01:00:00Z",
960 "2020-09-08T02:00:00Z",
961 "2020-09-08T03:00:00Z",
962 "2020-09-08T04:00:00Z",
963 ],
964 ),
965 (
966 vec![
967 "2020-09-08T00:30:00+05",
968 "2020-09-08T01:30:00+05",
969 "2020-09-08T02:30:00+05",
970 "2020-09-08T03:30:00+05",
971 "2020-09-08T04:30:00+05",
972 ],
973 Some("+05".into()),
974 vec![
975 "2020-09-08T00:00:00+05",
976 "2020-09-08T01:00:00+05",
977 "2020-09-08T02:00:00+05",
978 "2020-09-08T03:00:00+05",
979 "2020-09-08T04:00:00+05",
980 ],
981 ),
982 (
983 vec![
984 "2020-09-08T00:30:00+08",
985 "2020-09-08T01:30:00+08",
986 "2020-09-08T02:30:00+08",
987 "2020-09-08T03:30:00+08",
988 "2020-09-08T04:30:00+08",
989 ],
990 Some("+08".into()),
991 vec![
992 "2020-09-08T00:00:00+08",
993 "2020-09-08T01:00:00+08",
994 "2020-09-08T02:00:00+08",
995 "2020-09-08T03:00:00+08",
996 "2020-09-08T04:00:00+08",
997 ],
998 ),
999 (
1000 vec![
1001 "2024-10-26T23:30:00Z",
1002 "2024-10-27T00:30:00Z",
1003 "2024-10-27T01:30:00Z",
1004 "2024-10-27T02:30:00Z",
1005 ],
1006 Some("Europe/Berlin".into()),
1007 vec![
1008 "2024-10-27T01:00:00+02",
1009 "2024-10-27T02:00:00+02",
1010 "2024-10-27T02:00:00+01",
1011 "2024-10-27T03:00:00+01",
1012 ],
1013 ),
1014 (
1015 vec![
1016 "2018-02-18T00:30:00Z",
1017 "2018-02-18T01:30:00Z",
1018 "2018-02-18T02:30:00Z",
1019 "2018-02-18T03:30:00Z",
1020 "2018-11-04T01:00:00Z",
1021 "2018-11-04T02:00:00Z",
1022 "2018-11-04T03:00:00Z",
1023 "2018-11-04T04:00:00Z",
1024 ],
1025 Some("America/Sao_Paulo".into()),
1026 vec![
1027 "2018-02-17T22:00:00-02",
1028 "2018-02-17T23:00:00-02",
1029 "2018-02-17T23:00:00-03",
1030 "2018-02-18T00:00:00-03",
1031 "2018-11-03T22:00:00-03",
1032 "2018-11-03T23:00:00-03",
1033 "2018-11-04T01:00:00-02",
1034 "2018-11-04T02:00:00-02",
1035 ],
1036 ),
1037 (
1038 vec![
1039 "2024-10-26T23:30:00Z",
1040 "2024-10-27T00:30:00Z",
1041 "2024-10-27T01:30:00Z",
1042 "2024-10-27T02:30:00Z",
1043 ],
1044 Some("Asia/Kathmandu".into()), vec![
1046 "2024-10-27T05:00:00+05:45",
1047 "2024-10-27T06:00:00+05:45",
1048 "2024-10-27T07:00:00+05:45",
1049 "2024-10-27T08:00:00+05:45",
1050 ],
1051 ),
1052 ];
1053
1054 cases.iter().for_each(|(original, tz_opt, expected)| {
1055 let input = original
1056 .iter()
1057 .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
1058 .collect::<TimestampNanosecondArray>()
1059 .with_timezone_opt(tz_opt.clone());
1060 let right = expected
1061 .iter()
1062 .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
1063 .collect::<TimestampNanosecondArray>()
1064 .with_timezone_opt(tz_opt.clone());
1065 let batch_len = input.len();
1066 let arg_fields = vec![
1067 Field::new("a", DataType::Utf8, false).into(),
1068 Field::new("b", input.data_type().clone(), false).into(),
1069 ];
1070 let args = datafusion_expr::ScalarFunctionArgs {
1071 args: vec![
1072 ColumnarValue::Scalar(ScalarValue::from("hour")),
1073 ColumnarValue::Array(Arc::new(input)),
1074 ],
1075 arg_fields,
1076 number_rows: batch_len,
1077 return_field: Field::new(
1078 "f",
1079 DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()),
1080 true,
1081 )
1082 .into(),
1083 config_options: Arc::new(ConfigOptions::default()),
1084 lambdas: None,
1085 };
1086 let result = DateTruncFunc::new().invoke_with_args(args).unwrap();
1087 if let ColumnarValue::Array(result) = result {
1088 assert_eq!(
1089 result.data_type(),
1090 &DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone())
1091 );
1092 let left = as_primitive_array::<TimestampNanosecondType>(&result);
1093 assert_eq!(left, &right);
1094 } else {
1095 panic!("unexpected column type");
1096 }
1097 });
1098 }
1099}