datafusion_functions/datetime/
date_trunc.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::any::Any;
19use std::num::NonZeroI64;
20use std::ops::{Add, Sub};
21use std::str::FromStr;
22use std::sync::Arc;
23
24use arrow::array::temporal_conversions::{
25    as_datetime_with_timezone, timestamp_ns_to_datetime,
26};
27use arrow::array::timezone::Tz;
28use arrow::array::types::{
29    ArrowTimestampType, TimestampMicrosecondType, TimestampMillisecondType,
30    TimestampNanosecondType, TimestampSecondType,
31};
32use arrow::array::{Array, ArrayRef, PrimitiveArray};
33use arrow::datatypes::DataType::{self, Null, Timestamp, Utf8, Utf8View};
34use arrow::datatypes::TimeUnit::{self, Microsecond, Millisecond, Nanosecond, Second};
35use datafusion_common::cast::as_primitive_array;
36use datafusion_common::{
37    exec_datafusion_err, exec_err, plan_err, DataFusionError, Result, ScalarValue,
38};
39use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
40use datafusion_expr::TypeSignature::Exact;
41use datafusion_expr::{
42    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD,
43};
44use datafusion_macros::user_doc;
45
46use chrono::{
47    DateTime, Datelike, Duration, LocalResult, NaiveDateTime, Offset, TimeDelta, Timelike,
48};
49
50/// Represents the granularity for date truncation operations
51#[derive(Debug, Clone, Copy, PartialEq, Eq)]
52enum DateTruncGranularity {
53    Microsecond,
54    Millisecond,
55    Second,
56    Minute,
57    Hour,
58    Day,
59    Week,
60    Month,
61    Quarter,
62    Year,
63}
64
65impl DateTruncGranularity {
66    /// List of all supported granularity values
67    /// Cannot use HashMap here as it would require lazy_static or once_cell,
68    /// Rust does not support const HashMap yet.
69    const SUPPORTED_GRANULARITIES: &[&str] = &[
70        "microsecond",
71        "millisecond",
72        "second",
73        "minute",
74        "hour",
75        "day",
76        "week",
77        "month",
78        "quarter",
79        "year",
80    ];
81
82    /// Parse a granularity string into a DateTruncGranularity enum
83    fn from_str(s: &str) -> Result<Self> {
84        // Using match for O(1) lookup - compiler optimizes this into a jump table or perfect hash
85        match s.to_lowercase().as_str() {
86            "microsecond" => Ok(Self::Microsecond),
87            "millisecond" => Ok(Self::Millisecond),
88            "second" => Ok(Self::Second),
89            "minute" => Ok(Self::Minute),
90            "hour" => Ok(Self::Hour),
91            "day" => Ok(Self::Day),
92            "week" => Ok(Self::Week),
93            "month" => Ok(Self::Month),
94            "quarter" => Ok(Self::Quarter),
95            "year" => Ok(Self::Year),
96            _ => {
97                let supported = Self::SUPPORTED_GRANULARITIES.join(", ");
98                exec_err!(
99                    "Unsupported date_trunc granularity: '{s}'. Supported values are: {supported}"
100                )
101            }
102        }
103    }
104
105    /// Returns true if this granularity can be handled with simple arithmetic
106    /// (fine granularity: second, minute, millisecond, microsecond)
107    fn is_fine_granularity(&self) -> bool {
108        matches!(
109            self,
110            Self::Second | Self::Minute | Self::Millisecond | Self::Microsecond
111        )
112    }
113
114    /// Returns true if this granularity can be handled with simple arithmetic in UTC
115    /// (hour and day in addition to fine granularities)
116    fn is_fine_granularity_utc(&self) -> bool {
117        self.is_fine_granularity() || matches!(self, Self::Hour | Self::Day)
118    }
119}
120
121#[user_doc(
122    doc_section(label = "Time and Date Functions"),
123    description = "Truncates a timestamp value to a specified precision.",
124    syntax_example = "date_trunc(precision, expression)",
125    argument(
126        name = "precision",
127        description = r#"Time precision to truncate to. The following precisions are supported:
128
129    - year / YEAR
130    - quarter / QUARTER
131    - month / MONTH
132    - week / WEEK
133    - day / DAY
134    - hour / HOUR
135    - minute / MINUTE
136    - second / SECOND
137    - millisecond / MILLISECOND
138    - microsecond / MICROSECOND
139"#
140    ),
141    argument(
142        name = "expression",
143        description = "Time expression to operate on. Can be a constant, column, or function."
144    )
145)]
146#[derive(Debug, PartialEq, Eq, Hash)]
147pub struct DateTruncFunc {
148    signature: Signature,
149    aliases: Vec<String>,
150}
151
152impl Default for DateTruncFunc {
153    fn default() -> Self {
154        Self::new()
155    }
156}
157
158impl DateTruncFunc {
159    pub fn new() -> Self {
160        Self {
161            signature: Signature::one_of(
162                vec![
163                    Exact(vec![Utf8, Timestamp(Nanosecond, None)]),
164                    Exact(vec![Utf8View, Timestamp(Nanosecond, None)]),
165                    Exact(vec![
166                        Utf8,
167                        Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())),
168                    ]),
169                    Exact(vec![
170                        Utf8View,
171                        Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())),
172                    ]),
173                    Exact(vec![Utf8, Timestamp(Microsecond, None)]),
174                    Exact(vec![Utf8View, Timestamp(Microsecond, None)]),
175                    Exact(vec![
176                        Utf8,
177                        Timestamp(Microsecond, Some(TIMEZONE_WILDCARD.into())),
178                    ]),
179                    Exact(vec![
180                        Utf8View,
181                        Timestamp(Microsecond, Some(TIMEZONE_WILDCARD.into())),
182                    ]),
183                    Exact(vec![Utf8, Timestamp(Millisecond, None)]),
184                    Exact(vec![Utf8View, Timestamp(Millisecond, None)]),
185                    Exact(vec![
186                        Utf8,
187                        Timestamp(Millisecond, Some(TIMEZONE_WILDCARD.into())),
188                    ]),
189                    Exact(vec![
190                        Utf8View,
191                        Timestamp(Millisecond, Some(TIMEZONE_WILDCARD.into())),
192                    ]),
193                    Exact(vec![Utf8, Timestamp(Second, None)]),
194                    Exact(vec![Utf8View, Timestamp(Second, None)]),
195                    Exact(vec![
196                        Utf8,
197                        Timestamp(Second, Some(TIMEZONE_WILDCARD.into())),
198                    ]),
199                    Exact(vec![
200                        Utf8View,
201                        Timestamp(Second, Some(TIMEZONE_WILDCARD.into())),
202                    ]),
203                ],
204                Volatility::Immutable,
205            ),
206            aliases: vec![String::from("datetrunc")],
207        }
208    }
209}
210
211impl ScalarUDFImpl for DateTruncFunc {
212    fn as_any(&self) -> &dyn Any {
213        self
214    }
215
216    fn name(&self) -> &str {
217        "date_trunc"
218    }
219
220    fn signature(&self) -> &Signature {
221        &self.signature
222    }
223
224    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
225        match &arg_types[1] {
226            Timestamp(Nanosecond, None) | Utf8 | DataType::Date32 | Null => {
227                Ok(Timestamp(Nanosecond, None))
228            }
229            Timestamp(Nanosecond, tz_opt) => Ok(Timestamp(Nanosecond, tz_opt.clone())),
230            Timestamp(Microsecond, tz_opt) => Ok(Timestamp(Microsecond, tz_opt.clone())),
231            Timestamp(Millisecond, tz_opt) => Ok(Timestamp(Millisecond, tz_opt.clone())),
232            Timestamp(Second, tz_opt) => Ok(Timestamp(Second, tz_opt.clone())),
233            _ => plan_err!(
234                "The date_trunc function can only accept timestamp as the second arg."
235            ),
236        }
237    }
238
239    fn invoke_with_args(
240        &self,
241        args: datafusion_expr::ScalarFunctionArgs,
242    ) -> Result<ColumnarValue> {
243        let args = args.args;
244        let (granularity, array) = (&args[0], &args[1]);
245
246        let granularity_str = if let ColumnarValue::Scalar(ScalarValue::Utf8(Some(v))) =
247            granularity
248        {
249            v.to_lowercase()
250        } else if let ColumnarValue::Scalar(ScalarValue::Utf8View(Some(v))) = granularity
251        {
252            v.to_lowercase()
253        } else {
254            return exec_err!("Granularity of `date_trunc` must be non-null scalar Utf8");
255        };
256
257        let granularity = DateTruncGranularity::from_str(&granularity_str)?;
258
259        fn process_array<T: ArrowTimestampType>(
260            array: &dyn Array,
261            granularity: DateTruncGranularity,
262            tz_opt: &Option<Arc<str>>,
263        ) -> Result<ColumnarValue> {
264            let parsed_tz = parse_tz(tz_opt)?;
265            let array = as_primitive_array::<T>(array)?;
266
267            // fast path for fine granularity
268            // For modern timezones, it's correct to truncate "minute" in this way.
269            // Both datafusion and arrow are ignoring historical timezone's non-minute granularity
270            // bias (e.g., Asia/Kathmandu before 1919 is UTC+05:41:16).
271            // In UTC, "hour" and "day" have uniform durations and can be truncated with simple arithmetic
272            if granularity.is_fine_granularity()
273                || (parsed_tz.is_none() && granularity.is_fine_granularity_utc())
274            {
275                let result = general_date_trunc_array_fine_granularity(
276                    T::UNIT,
277                    array,
278                    granularity,
279                )?;
280                return Ok(ColumnarValue::Array(result));
281            }
282
283            let array: PrimitiveArray<T> = array
284                .try_unary(|x| general_date_trunc(T::UNIT, x, parsed_tz, granularity))?
285                .with_timezone_opt(tz_opt.clone());
286            Ok(ColumnarValue::Array(Arc::new(array)))
287        }
288
289        fn process_scalar<T: ArrowTimestampType>(
290            v: &Option<i64>,
291            granularity: DateTruncGranularity,
292            tz_opt: &Option<Arc<str>>,
293        ) -> Result<ColumnarValue> {
294            let parsed_tz = parse_tz(tz_opt)?;
295            let value = if let Some(v) = v {
296                Some(general_date_trunc(T::UNIT, *v, parsed_tz, granularity)?)
297            } else {
298                None
299            };
300            let value = ScalarValue::new_timestamp::<T>(value, tz_opt.clone());
301            Ok(ColumnarValue::Scalar(value))
302        }
303
304        Ok(match array {
305            ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(v, tz_opt)) => {
306                process_scalar::<TimestampNanosecondType>(v, granularity, tz_opt)?
307            }
308            ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond(v, tz_opt)) => {
309                process_scalar::<TimestampMicrosecondType>(v, granularity, tz_opt)?
310            }
311            ColumnarValue::Scalar(ScalarValue::TimestampMillisecond(v, tz_opt)) => {
312                process_scalar::<TimestampMillisecondType>(v, granularity, tz_opt)?
313            }
314            ColumnarValue::Scalar(ScalarValue::TimestampSecond(v, tz_opt)) => {
315                process_scalar::<TimestampSecondType>(v, granularity, tz_opt)?
316            }
317            ColumnarValue::Array(array) => {
318                let array_type = array.data_type();
319                if let Timestamp(unit, tz_opt) = array_type {
320                    match unit {
321                        Second => process_array::<TimestampSecondType>(
322                            array,
323                            granularity,
324                            tz_opt,
325                        )?,
326                        Millisecond => process_array::<TimestampMillisecondType>(
327                            array,
328                            granularity,
329                            tz_opt,
330                        )?,
331                        Microsecond => process_array::<TimestampMicrosecondType>(
332                            array,
333                            granularity,
334                            tz_opt,
335                        )?,
336                        Nanosecond => process_array::<TimestampNanosecondType>(
337                            array,
338                            granularity,
339                            tz_opt,
340                        )?,
341                    }
342                } else {
343                    return exec_err!("second argument of `date_trunc` is an unsupported array type: {array_type}");
344                }
345            }
346            _ => {
347                return exec_err!(
348                    "second argument of `date_trunc` must be timestamp scalar or array"
349                );
350            }
351        })
352    }
353
354    fn aliases(&self) -> &[String] {
355        &self.aliases
356    }
357
358    fn output_ordering(&self, input: &[ExprProperties]) -> Result<SortProperties> {
359        // The DATE_TRUNC function preserves the order of its second argument.
360        let precision = &input[0];
361        let date_value = &input[1];
362
363        if precision.sort_properties.eq(&SortProperties::Singleton) {
364            Ok(date_value.sort_properties)
365        } else {
366            Ok(SortProperties::Unordered)
367        }
368    }
369    fn documentation(&self) -> Option<&Documentation> {
370        self.doc()
371    }
372}
373
374fn _date_trunc_coarse<T>(
375    granularity: DateTruncGranularity,
376    value: Option<T>,
377) -> Result<Option<T>>
378where
379    T: Datelike + Timelike + Sub<Duration, Output = T> + Copy,
380{
381    let value = match granularity {
382        DateTruncGranularity::Millisecond => value,
383        DateTruncGranularity::Microsecond => value,
384        DateTruncGranularity::Second => value.and_then(|d| d.with_nanosecond(0)),
385        DateTruncGranularity::Minute => value
386            .and_then(|d| d.with_nanosecond(0))
387            .and_then(|d| d.with_second(0)),
388        DateTruncGranularity::Hour => value
389            .and_then(|d| d.with_nanosecond(0))
390            .and_then(|d| d.with_second(0))
391            .and_then(|d| d.with_minute(0)),
392        DateTruncGranularity::Day => value
393            .and_then(|d| d.with_nanosecond(0))
394            .and_then(|d| d.with_second(0))
395            .and_then(|d| d.with_minute(0))
396            .and_then(|d| d.with_hour(0)),
397        DateTruncGranularity::Week => value
398            .and_then(|d| d.with_nanosecond(0))
399            .and_then(|d| d.with_second(0))
400            .and_then(|d| d.with_minute(0))
401            .and_then(|d| d.with_hour(0))
402            .map(|d| {
403                d - TimeDelta::try_seconds(60 * 60 * 24 * d.weekday() as i64).unwrap()
404            }),
405        DateTruncGranularity::Month => value
406            .and_then(|d| d.with_nanosecond(0))
407            .and_then(|d| d.with_second(0))
408            .and_then(|d| d.with_minute(0))
409            .and_then(|d| d.with_hour(0))
410            .and_then(|d| d.with_day0(0)),
411        DateTruncGranularity::Quarter => value
412            .and_then(|d| d.with_nanosecond(0))
413            .and_then(|d| d.with_second(0))
414            .and_then(|d| d.with_minute(0))
415            .and_then(|d| d.with_hour(0))
416            .and_then(|d| d.with_day0(0))
417            .and_then(|d| d.with_month(quarter_month(&d))),
418        DateTruncGranularity::Year => value
419            .and_then(|d| d.with_nanosecond(0))
420            .and_then(|d| d.with_second(0))
421            .and_then(|d| d.with_minute(0))
422            .and_then(|d| d.with_hour(0))
423            .and_then(|d| d.with_day0(0))
424            .and_then(|d| d.with_month0(0)),
425    };
426    Ok(value)
427}
428
429fn quarter_month<T>(date: &T) -> u32
430where
431    T: Datelike,
432{
433    1 + 3 * ((date.month() - 1) / 3)
434}
435
436fn _date_trunc_coarse_with_tz(
437    granularity: DateTruncGranularity,
438    value: Option<DateTime<Tz>>,
439) -> Result<Option<i64>> {
440    if let Some(value) = value {
441        let local = value.naive_local();
442        let truncated = _date_trunc_coarse::<NaiveDateTime>(granularity, Some(local))?;
443        let truncated = truncated.and_then(|truncated| {
444            match truncated.and_local_timezone(value.timezone()) {
445                LocalResult::None => {
446                    // This can happen if the date_trunc operation moves the time into
447                    // an hour that doesn't exist due to daylight savings. On known example where
448                    // this can happen is with historic dates in the America/Sao_Paulo time zone.
449                    // To account for this adjust the time by a few hours, convert to local time,
450                    // and then adjust the time back.
451                    truncated
452                        .sub(TimeDelta::try_hours(3).unwrap())
453                        .and_local_timezone(value.timezone())
454                        .single()
455                        .map(|v| v.add(TimeDelta::try_hours(3).unwrap()))
456                }
457                LocalResult::Single(datetime) => Some(datetime),
458                LocalResult::Ambiguous(datetime1, datetime2) => {
459                    // Because we are truncating from an equally or more specific time
460                    // the original time must have been within the ambiguous local time
461                    // period. Therefore the offset of one of these times should match the
462                    // offset of the original time.
463                    if datetime1.offset().fix() == value.offset().fix() {
464                        Some(datetime1)
465                    } else {
466                        Some(datetime2)
467                    }
468                }
469            }
470        });
471        Ok(truncated.and_then(|value| value.timestamp_nanos_opt()))
472    } else {
473        _date_trunc_coarse::<NaiveDateTime>(granularity, None)?;
474        Ok(None)
475    }
476}
477
478fn _date_trunc_coarse_without_tz(
479    granularity: DateTruncGranularity,
480    value: Option<NaiveDateTime>,
481) -> Result<Option<i64>> {
482    let value = _date_trunc_coarse::<NaiveDateTime>(granularity, value)?;
483    Ok(value.and_then(|value| value.and_utc().timestamp_nanos_opt()))
484}
485
486/// Truncates the single `value`, expressed in nanoseconds since the
487/// epoch, for granularities greater than 1 second, in taking into
488/// account that some granularities are not uniform durations of time
489/// (e.g. months are not always the same lengths, leap seconds, etc)
490fn date_trunc_coarse(
491    granularity: DateTruncGranularity,
492    value: i64,
493    tz: Option<Tz>,
494) -> Result<i64> {
495    let value = match tz {
496        Some(tz) => {
497            // Use chrono DateTime<Tz> to clear the various fields because need to clear per timezone,
498            // and NaiveDateTime (ISO 8601) has no concept of timezones
499            let value = as_datetime_with_timezone::<TimestampNanosecondType>(value, tz)
500                .ok_or(exec_datafusion_err!("Timestamp {value} out of range"))?;
501            _date_trunc_coarse_with_tz(granularity, Some(value))
502        }
503        None => {
504            // Use chrono NaiveDateTime to clear the various fields, if we don't have a timezone.
505            let value = timestamp_ns_to_datetime(value)
506                .ok_or_else(|| exec_datafusion_err!("Timestamp {value} out of range"))?;
507            _date_trunc_coarse_without_tz(granularity, Some(value))
508        }
509    }?;
510
511    // `with_x(0)` are infallible because `0` are always a valid
512    Ok(value.unwrap())
513}
514
515/// Fast path for fine granularities (hour and smaller) that can be handled
516/// with simple arithmetic operations without calendar complexity.
517///
518/// This function is timezone-agnostic and should only be used when:
519/// - No timezone is specified in the input, OR
520/// - The granularity is less than hour as hour can be affected by DST transitions in some cases
521fn general_date_trunc_array_fine_granularity<T: ArrowTimestampType>(
522    tu: TimeUnit,
523    array: &PrimitiveArray<T>,
524    granularity: DateTruncGranularity,
525) -> Result<ArrayRef> {
526    let unit = match (tu, granularity) {
527        (Second, DateTruncGranularity::Minute) => NonZeroI64::new(60),
528        (Second, DateTruncGranularity::Hour) => NonZeroI64::new(3600),
529        (Second, DateTruncGranularity::Day) => NonZeroI64::new(86400),
530
531        (Millisecond, DateTruncGranularity::Second) => NonZeroI64::new(1_000),
532        (Millisecond, DateTruncGranularity::Minute) => NonZeroI64::new(60_000),
533        (Millisecond, DateTruncGranularity::Hour) => NonZeroI64::new(3_600_000),
534        (Millisecond, DateTruncGranularity::Day) => NonZeroI64::new(86_400_000),
535
536        (Microsecond, DateTruncGranularity::Millisecond) => NonZeroI64::new(1_000),
537        (Microsecond, DateTruncGranularity::Second) => NonZeroI64::new(1_000_000),
538        (Microsecond, DateTruncGranularity::Minute) => NonZeroI64::new(60_000_000),
539        (Microsecond, DateTruncGranularity::Hour) => NonZeroI64::new(3_600_000_000),
540        (Microsecond, DateTruncGranularity::Day) => NonZeroI64::new(86_400_000_000),
541
542        (Nanosecond, DateTruncGranularity::Microsecond) => NonZeroI64::new(1_000),
543        (Nanosecond, DateTruncGranularity::Millisecond) => NonZeroI64::new(1_000_000),
544        (Nanosecond, DateTruncGranularity::Second) => NonZeroI64::new(1_000_000_000),
545        (Nanosecond, DateTruncGranularity::Minute) => NonZeroI64::new(60_000_000_000),
546        (Nanosecond, DateTruncGranularity::Hour) => NonZeroI64::new(3_600_000_000_000),
547        (Nanosecond, DateTruncGranularity::Day) => NonZeroI64::new(86_400_000_000_000),
548        _ => None,
549    };
550
551    if let Some(unit) = unit {
552        let unit = unit.get();
553        let array = PrimitiveArray::<T>::from_iter_values_with_nulls(
554            array
555                .values()
556                .iter()
557                .map(|v| *v - i64::rem_euclid(*v, unit)),
558            array.nulls().cloned(),
559        );
560        Ok(Arc::new(array))
561    } else {
562        // truncate to the same or smaller unit
563        Ok(Arc::new(array.clone()))
564    }
565}
566
567// truncates a single value with the given timeunit to the specified granularity
568fn general_date_trunc(
569    tu: TimeUnit,
570    value: i64,
571    tz: Option<Tz>,
572    granularity: DateTruncGranularity,
573) -> Result<i64, DataFusionError> {
574    let scale = match tu {
575        Second => 1_000_000_000,
576        Millisecond => 1_000_000,
577        Microsecond => 1_000,
578        Nanosecond => 1,
579    };
580
581    // convert to nanoseconds
582    let nano = date_trunc_coarse(granularity, scale * value, tz)?;
583
584    let result = match tu {
585        Second => match granularity {
586            DateTruncGranularity::Minute => nano / 1_000_000_000 / 60 * 60,
587            _ => nano / 1_000_000_000,
588        },
589        Millisecond => match granularity {
590            DateTruncGranularity::Minute => nano / 1_000_000 / 1_000 / 60 * 1_000 * 60,
591            DateTruncGranularity::Second => nano / 1_000_000 / 1_000 * 1_000,
592            _ => nano / 1_000_000,
593        },
594        Microsecond => match granularity {
595            DateTruncGranularity::Minute => {
596                nano / 1_000 / 1_000_000 / 60 * 60 * 1_000_000
597            }
598            DateTruncGranularity::Second => nano / 1_000 / 1_000_000 * 1_000_000,
599            DateTruncGranularity::Millisecond => nano / 1_000 / 1_000 * 1_000,
600            _ => nano / 1_000,
601        },
602        _ => match granularity {
603            DateTruncGranularity::Minute => {
604                nano / 1_000_000_000 / 60 * 1_000_000_000 * 60
605            }
606            DateTruncGranularity::Second => nano / 1_000_000_000 * 1_000_000_000,
607            DateTruncGranularity::Millisecond => nano / 1_000_000 * 1_000_000,
608            DateTruncGranularity::Microsecond => nano / 1_000 * 1_000,
609            _ => nano,
610        },
611    };
612    Ok(result)
613}
614
615fn parse_tz(tz: &Option<Arc<str>>) -> Result<Option<Tz>> {
616    tz.as_ref()
617        .map(|tz| {
618            Tz::from_str(tz)
619                .map_err(|op| exec_datafusion_err!("failed on timezone {tz}: {op:?}"))
620        })
621        .transpose()
622}
623
624#[cfg(test)]
625mod tests {
626    use std::sync::Arc;
627
628    use crate::datetime::date_trunc::{
629        date_trunc_coarse, DateTruncFunc, DateTruncGranularity,
630    };
631
632    use arrow::array::cast::as_primitive_array;
633    use arrow::array::types::TimestampNanosecondType;
634    use arrow::array::{Array, TimestampNanosecondArray};
635    use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos;
636    use arrow::datatypes::{DataType, Field, TimeUnit};
637    use datafusion_common::config::ConfigOptions;
638    use datafusion_common::ScalarValue;
639    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
640
641    #[test]
642    fn date_trunc_test() {
643        let cases = vec![
644            (
645                "2020-09-08T13:42:29.190855Z",
646                "second",
647                "2020-09-08T13:42:29.000000Z",
648            ),
649            (
650                "2020-09-08T13:42:29.190855Z",
651                "minute",
652                "2020-09-08T13:42:00.000000Z",
653            ),
654            (
655                "2020-09-08T13:42:29.190855Z",
656                "hour",
657                "2020-09-08T13:00:00.000000Z",
658            ),
659            (
660                "2020-09-08T13:42:29.190855Z",
661                "day",
662                "2020-09-08T00:00:00.000000Z",
663            ),
664            (
665                "2020-09-08T13:42:29.190855Z",
666                "week",
667                "2020-09-07T00:00:00.000000Z",
668            ),
669            (
670                "2020-09-08T13:42:29.190855Z",
671                "month",
672                "2020-09-01T00:00:00.000000Z",
673            ),
674            (
675                "2020-09-08T13:42:29.190855Z",
676                "year",
677                "2020-01-01T00:00:00.000000Z",
678            ),
679            // week
680            (
681                "2021-01-01T13:42:29.190855Z",
682                "week",
683                "2020-12-28T00:00:00.000000Z",
684            ),
685            (
686                "2020-01-01T13:42:29.190855Z",
687                "week",
688                "2019-12-30T00:00:00.000000Z",
689            ),
690            // quarter
691            (
692                "2020-01-01T13:42:29.190855Z",
693                "quarter",
694                "2020-01-01T00:00:00.000000Z",
695            ),
696            (
697                "2020-02-01T13:42:29.190855Z",
698                "quarter",
699                "2020-01-01T00:00:00.000000Z",
700            ),
701            (
702                "2020-03-01T13:42:29.190855Z",
703                "quarter",
704                "2020-01-01T00:00:00.000000Z",
705            ),
706            (
707                "2020-04-01T13:42:29.190855Z",
708                "quarter",
709                "2020-04-01T00:00:00.000000Z",
710            ),
711            (
712                "2020-08-01T13:42:29.190855Z",
713                "quarter",
714                "2020-07-01T00:00:00.000000Z",
715            ),
716            (
717                "2020-11-01T13:42:29.190855Z",
718                "quarter",
719                "2020-10-01T00:00:00.000000Z",
720            ),
721            (
722                "2020-12-01T13:42:29.190855Z",
723                "quarter",
724                "2020-10-01T00:00:00.000000Z",
725            ),
726        ];
727
728        cases.iter().for_each(|(original, granularity, expected)| {
729            let left = string_to_timestamp_nanos(original).unwrap();
730            let right = string_to_timestamp_nanos(expected).unwrap();
731            let granularity_enum = DateTruncGranularity::from_str(granularity).unwrap();
732            let result = date_trunc_coarse(granularity_enum, left, None).unwrap();
733            assert_eq!(result, right, "{original} = {expected}");
734        });
735    }
736
737    #[test]
738    fn test_date_trunc_timezones() {
739        let cases = [
740            (
741                vec![
742                    "2020-09-08T00:00:00Z",
743                    "2020-09-08T01:00:00Z",
744                    "2020-09-08T02:00:00Z",
745                    "2020-09-08T03:00:00Z",
746                    "2020-09-08T04:00:00Z",
747                ],
748                Some("+00".into()),
749                vec![
750                    "2020-09-08T00:00:00Z",
751                    "2020-09-08T00:00:00Z",
752                    "2020-09-08T00:00:00Z",
753                    "2020-09-08T00:00:00Z",
754                    "2020-09-08T00:00:00Z",
755                ],
756            ),
757            (
758                vec![
759                    "2020-09-08T00:00:00Z",
760                    "2020-09-08T01:00:00Z",
761                    "2020-09-08T02:00:00Z",
762                    "2020-09-08T03:00:00Z",
763                    "2020-09-08T04:00:00Z",
764                ],
765                None,
766                vec![
767                    "2020-09-08T00:00:00Z",
768                    "2020-09-08T00:00:00Z",
769                    "2020-09-08T00:00:00Z",
770                    "2020-09-08T00:00:00Z",
771                    "2020-09-08T00:00:00Z",
772                ],
773            ),
774            (
775                vec![
776                    "2020-09-08T00:00:00Z",
777                    "2020-09-08T01:00:00Z",
778                    "2020-09-08T02:00:00Z",
779                    "2020-09-08T03:00:00Z",
780                    "2020-09-08T04:00:00Z",
781                ],
782                Some("-02".into()),
783                vec![
784                    "2020-09-07T02:00:00Z",
785                    "2020-09-07T02:00:00Z",
786                    "2020-09-08T02:00:00Z",
787                    "2020-09-08T02:00:00Z",
788                    "2020-09-08T02:00:00Z",
789                ],
790            ),
791            (
792                vec![
793                    "2020-09-08T00:00:00+05",
794                    "2020-09-08T01:00:00+05",
795                    "2020-09-08T02:00:00+05",
796                    "2020-09-08T03:00:00+05",
797                    "2020-09-08T04:00:00+05",
798                ],
799                Some("+05".into()),
800                vec![
801                    "2020-09-08T00:00:00+05",
802                    "2020-09-08T00:00:00+05",
803                    "2020-09-08T00:00:00+05",
804                    "2020-09-08T00:00:00+05",
805                    "2020-09-08T00:00:00+05",
806                ],
807            ),
808            (
809                vec![
810                    "2020-09-08T00:00:00+08",
811                    "2020-09-08T01:00:00+08",
812                    "2020-09-08T02:00:00+08",
813                    "2020-09-08T03:00:00+08",
814                    "2020-09-08T04:00:00+08",
815                ],
816                Some("+08".into()),
817                vec![
818                    "2020-09-08T00:00:00+08",
819                    "2020-09-08T00:00:00+08",
820                    "2020-09-08T00:00:00+08",
821                    "2020-09-08T00:00:00+08",
822                    "2020-09-08T00:00:00+08",
823                ],
824            ),
825            (
826                vec![
827                    "2024-10-26T23:00:00Z",
828                    "2024-10-27T00:00:00Z",
829                    "2024-10-27T01:00:00Z",
830                    "2024-10-27T02:00:00Z",
831                ],
832                Some("Europe/Berlin".into()),
833                vec![
834                    "2024-10-27T00:00:00+02",
835                    "2024-10-27T00:00:00+02",
836                    "2024-10-27T00:00:00+02",
837                    "2024-10-27T00:00:00+02",
838                ],
839            ),
840            (
841                vec![
842                    "2018-02-18T00:00:00Z",
843                    "2018-02-18T01:00:00Z",
844                    "2018-02-18T02:00:00Z",
845                    "2018-02-18T03:00:00Z",
846                    "2018-11-04T01:00:00Z",
847                    "2018-11-04T02:00:00Z",
848                    "2018-11-04T03:00:00Z",
849                    "2018-11-04T04:00:00Z",
850                ],
851                Some("America/Sao_Paulo".into()),
852                vec![
853                    "2018-02-17T00:00:00-02",
854                    "2018-02-17T00:00:00-02",
855                    "2018-02-17T00:00:00-02",
856                    "2018-02-18T00:00:00-03",
857                    "2018-11-03T00:00:00-03",
858                    "2018-11-03T00:00:00-03",
859                    "2018-11-04T01:00:00-02",
860                    "2018-11-04T01:00:00-02",
861                ],
862            ),
863        ];
864
865        cases.iter().for_each(|(original, tz_opt, expected)| {
866            let input = original
867                .iter()
868                .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
869                .collect::<TimestampNanosecondArray>()
870                .with_timezone_opt(tz_opt.clone());
871            let right = expected
872                .iter()
873                .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
874                .collect::<TimestampNanosecondArray>()
875                .with_timezone_opt(tz_opt.clone());
876            let batch_len = input.len();
877            let arg_fields = vec![
878                Field::new("a", DataType::Utf8, false).into(),
879                Field::new("b", input.data_type().clone(), false).into(),
880            ];
881            let args = datafusion_expr::ScalarFunctionArgs {
882                args: vec![
883                    ColumnarValue::Scalar(ScalarValue::from("day")),
884                    ColumnarValue::Array(Arc::new(input)),
885                ],
886                arg_fields,
887                number_rows: batch_len,
888                return_field: Field::new(
889                    "f",
890                    DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()),
891                    true,
892                )
893                .into(),
894                config_options: Arc::new(ConfigOptions::default()),
895                lambdas: None,
896            };
897            let result = DateTruncFunc::new().invoke_with_args(args).unwrap();
898            if let ColumnarValue::Array(result) = result {
899                assert_eq!(
900                    result.data_type(),
901                    &DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone())
902                );
903                let left = as_primitive_array::<TimestampNanosecondType>(&result);
904                assert_eq!(left, &right);
905            } else {
906                panic!("unexpected column type");
907            }
908        });
909    }
910
911    #[test]
912    fn test_date_trunc_hour_timezones() {
913        let cases = [
914            (
915                vec![
916                    "2020-09-08T00:30:00Z",
917                    "2020-09-08T01:30:00Z",
918                    "2020-09-08T02:30:00Z",
919                    "2020-09-08T03:30:00Z",
920                    "2020-09-08T04:30:00Z",
921                ],
922                Some("+00".into()),
923                vec![
924                    "2020-09-08T00:00:00Z",
925                    "2020-09-08T01:00:00Z",
926                    "2020-09-08T02:00:00Z",
927                    "2020-09-08T03:00:00Z",
928                    "2020-09-08T04:00:00Z",
929                ],
930            ),
931            (
932                vec![
933                    "2020-09-08T00:30:00Z",
934                    "2020-09-08T01:30:00Z",
935                    "2020-09-08T02:30:00Z",
936                    "2020-09-08T03:30:00Z",
937                    "2020-09-08T04:30:00Z",
938                ],
939                None,
940                vec![
941                    "2020-09-08T00:00:00Z",
942                    "2020-09-08T01:00:00Z",
943                    "2020-09-08T02:00:00Z",
944                    "2020-09-08T03:00:00Z",
945                    "2020-09-08T04:00:00Z",
946                ],
947            ),
948            (
949                vec![
950                    "2020-09-08T00:30:00Z",
951                    "2020-09-08T01:30:00Z",
952                    "2020-09-08T02:30:00Z",
953                    "2020-09-08T03:30:00Z",
954                    "2020-09-08T04:30:00Z",
955                ],
956                Some("-02".into()),
957                vec![
958                    "2020-09-08T00:00:00Z",
959                    "2020-09-08T01:00:00Z",
960                    "2020-09-08T02:00:00Z",
961                    "2020-09-08T03:00:00Z",
962                    "2020-09-08T04:00:00Z",
963                ],
964            ),
965            (
966                vec![
967                    "2020-09-08T00:30:00+05",
968                    "2020-09-08T01:30:00+05",
969                    "2020-09-08T02:30:00+05",
970                    "2020-09-08T03:30:00+05",
971                    "2020-09-08T04:30:00+05",
972                ],
973                Some("+05".into()),
974                vec![
975                    "2020-09-08T00:00:00+05",
976                    "2020-09-08T01:00:00+05",
977                    "2020-09-08T02:00:00+05",
978                    "2020-09-08T03:00:00+05",
979                    "2020-09-08T04:00:00+05",
980                ],
981            ),
982            (
983                vec![
984                    "2020-09-08T00:30:00+08",
985                    "2020-09-08T01:30:00+08",
986                    "2020-09-08T02:30:00+08",
987                    "2020-09-08T03:30:00+08",
988                    "2020-09-08T04:30:00+08",
989                ],
990                Some("+08".into()),
991                vec![
992                    "2020-09-08T00:00:00+08",
993                    "2020-09-08T01:00:00+08",
994                    "2020-09-08T02:00:00+08",
995                    "2020-09-08T03:00:00+08",
996                    "2020-09-08T04:00:00+08",
997                ],
998            ),
999            (
1000                vec![
1001                    "2024-10-26T23:30:00Z",
1002                    "2024-10-27T00:30:00Z",
1003                    "2024-10-27T01:30:00Z",
1004                    "2024-10-27T02:30:00Z",
1005                ],
1006                Some("Europe/Berlin".into()),
1007                vec![
1008                    "2024-10-27T01:00:00+02",
1009                    "2024-10-27T02:00:00+02",
1010                    "2024-10-27T02:00:00+01",
1011                    "2024-10-27T03:00:00+01",
1012                ],
1013            ),
1014            (
1015                vec![
1016                    "2018-02-18T00:30:00Z",
1017                    "2018-02-18T01:30:00Z",
1018                    "2018-02-18T02:30:00Z",
1019                    "2018-02-18T03:30:00Z",
1020                    "2018-11-04T01:00:00Z",
1021                    "2018-11-04T02:00:00Z",
1022                    "2018-11-04T03:00:00Z",
1023                    "2018-11-04T04:00:00Z",
1024                ],
1025                Some("America/Sao_Paulo".into()),
1026                vec![
1027                    "2018-02-17T22:00:00-02",
1028                    "2018-02-17T23:00:00-02",
1029                    "2018-02-17T23:00:00-03",
1030                    "2018-02-18T00:00:00-03",
1031                    "2018-11-03T22:00:00-03",
1032                    "2018-11-03T23:00:00-03",
1033                    "2018-11-04T01:00:00-02",
1034                    "2018-11-04T02:00:00-02",
1035                ],
1036            ),
1037            (
1038                vec![
1039                    "2024-10-26T23:30:00Z",
1040                    "2024-10-27T00:30:00Z",
1041                    "2024-10-27T01:30:00Z",
1042                    "2024-10-27T02:30:00Z",
1043                ],
1044                Some("Asia/Kathmandu".into()), // UTC+5:45
1045                vec![
1046                    "2024-10-27T05:00:00+05:45",
1047                    "2024-10-27T06:00:00+05:45",
1048                    "2024-10-27T07:00:00+05:45",
1049                    "2024-10-27T08:00:00+05:45",
1050                ],
1051            ),
1052        ];
1053
1054        cases.iter().for_each(|(original, tz_opt, expected)| {
1055            let input = original
1056                .iter()
1057                .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
1058                .collect::<TimestampNanosecondArray>()
1059                .with_timezone_opt(tz_opt.clone());
1060            let right = expected
1061                .iter()
1062                .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
1063                .collect::<TimestampNanosecondArray>()
1064                .with_timezone_opt(tz_opt.clone());
1065            let batch_len = input.len();
1066            let arg_fields = vec![
1067                Field::new("a", DataType::Utf8, false).into(),
1068                Field::new("b", input.data_type().clone(), false).into(),
1069            ];
1070            let args = datafusion_expr::ScalarFunctionArgs {
1071                args: vec![
1072                    ColumnarValue::Scalar(ScalarValue::from("hour")),
1073                    ColumnarValue::Array(Arc::new(input)),
1074                ],
1075                arg_fields,
1076                number_rows: batch_len,
1077                return_field: Field::new(
1078                    "f",
1079                    DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()),
1080                    true,
1081                )
1082                .into(),
1083                config_options: Arc::new(ConfigOptions::default()),
1084                lambdas: None,
1085            };
1086            let result = DateTruncFunc::new().invoke_with_args(args).unwrap();
1087            if let ColumnarValue::Array(result) = result {
1088                assert_eq!(
1089                    result.data_type(),
1090                    &DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone())
1091                );
1092                let left = as_primitive_array::<TimestampNanosecondType>(&result);
1093                assert_eq!(left, &right);
1094            } else {
1095                panic!("unexpected column type");
1096            }
1097        });
1098    }
1099}