datafusion_functions/datetime/
to_date.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::datetime::common::*;
19use arrow::datatypes::DataType;
20use arrow::datatypes::DataType::*;
21use arrow::error::ArrowError::ParseError;
22use arrow::{array::types::Date32Type, compute::kernels::cast_utils::Parser};
23use datafusion_common::error::DataFusionError;
24use datafusion_common::{arrow_err, exec_err, internal_datafusion_err, Result};
25use datafusion_expr::{
26    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
27};
28use datafusion_macros::user_doc;
29use std::any::Any;
30
31#[user_doc(
32    doc_section(label = "Time and Date Functions"),
33    description = r"Converts a value to a date (`YYYY-MM-DD`).
34Supports strings, integer and double types as input.
35Strings are parsed as YYYY-MM-DD (e.g. '2023-07-20') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided.
36Integers and doubles are interpreted as days since the unix epoch (`1970-01-01T00:00:00Z`).
37Returns the corresponding date.
38
39Note: `to_date` returns Date32, which represents its values as the number of days since unix epoch(`1970-01-01`) stored as signed 32 bit value. The largest supported date value is `9999-12-31`.",
40    syntax_example = "to_date('2017-05-31', '%Y-%m-%d')",
41    sql_example = r#"```sql
42> select to_date('2023-01-31');
43+-------------------------------+
44| to_date(Utf8("2023-01-31")) |
45+-------------------------------+
46| 2023-01-31                    |
47+-------------------------------+
48> select to_date('2023/01/31', '%Y-%m-%d', '%Y/%m/%d');
49+---------------------------------------------------------------------+
50| to_date(Utf8("2023/01/31"),Utf8("%Y-%m-%d"),Utf8("%Y/%m/%d")) |
51+---------------------------------------------------------------------+
52| 2023-01-31                                                          |
53+---------------------------------------------------------------------+
54```
55
56Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/date_time_functions.rs)
57"#,
58    standard_argument(name = "expression", prefix = "String"),
59    argument(
60        name = "format_n",
61        description = r"Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order
62  they appear with the first successful one being returned. If none of the formats successfully parse the expression
63  an error will be returned."
64    )
65)]
66#[derive(Debug, PartialEq, Eq, Hash)]
67pub struct ToDateFunc {
68    signature: Signature,
69}
70
71impl Default for ToDateFunc {
72    fn default() -> Self {
73        Self::new()
74    }
75}
76
77impl ToDateFunc {
78    pub fn new() -> Self {
79        Self {
80            signature: Signature::variadic_any(Volatility::Immutable),
81        }
82    }
83
84    fn to_date(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
85        match args.len() {
86            1 => handle::<Date32Type, _, Date32Type>(
87                args,
88                |s| match Date32Type::parse(s) {
89                    Some(v) => Ok(v),
90                    None => arrow_err!(ParseError(
91                        "Unable to cast to Date32 for converting from i64 to i32 failed"
92                            .to_string()
93                    )),
94                },
95                "to_date",
96            ),
97            2.. => handle_multiple::<Date32Type, _, Date32Type, _>(
98                args,
99                |s, format| {
100                    string_to_timestamp_millis_formatted(s, format)
101                        .map(|n| n / (24 * 60 * 60 * 1_000))
102                        .and_then(|v| {
103                            v.try_into().map_err(|_| {
104                                internal_datafusion_err!("Unable to cast to Date32 for converting from i64 to i32 failed")
105                            })
106                        })
107                },
108                |n| n,
109                "to_date",
110            ),
111            0 => exec_err!("Unsupported 0 argument count for function to_date"),
112        }
113    }
114}
115
116impl ScalarUDFImpl for ToDateFunc {
117    fn as_any(&self) -> &dyn Any {
118        self
119    }
120
121    fn name(&self) -> &str {
122        "to_date"
123    }
124
125    fn signature(&self) -> &Signature {
126        &self.signature
127    }
128
129    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
130        Ok(Date32)
131    }
132
133    fn invoke_with_args(
134        &self,
135        args: datafusion_expr::ScalarFunctionArgs,
136    ) -> Result<ColumnarValue> {
137        let args = args.args;
138        if args.is_empty() {
139            return exec_err!("to_date function requires 1 or more arguments, got 0");
140        }
141
142        // validate that any args after the first one are Utf8
143        if args.len() > 1 {
144            validate_data_types(&args, "to_date")?;
145        }
146
147        match args[0].data_type() {
148            Int32 | Int64 | Null | Float64 | Date32 | Date64 => {
149                args[0].cast_to(&Date32, None)
150            }
151            Utf8View | LargeUtf8 | Utf8 => self.to_date(&args),
152            other => {
153                exec_err!("Unsupported data type {} for function to_date", other)
154            }
155        }
156    }
157
158    fn documentation(&self) -> Option<&Documentation> {
159        self.doc()
160    }
161}
162
163#[cfg(test)]
164mod tests {
165    use super::ToDateFunc;
166    use arrow::array::{Array, Date32Array, GenericStringArray, StringViewArray};
167    use arrow::datatypes::{DataType, Field};
168    use arrow::{compute::kernels::cast_utils::Parser, datatypes::Date32Type};
169    use datafusion_common::config::ConfigOptions;
170    use datafusion_common::{DataFusionError, ScalarValue};
171    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
172    use std::sync::Arc;
173
174    fn invoke_to_date_with_args(
175        args: Vec<ColumnarValue>,
176        number_rows: usize,
177    ) -> Result<ColumnarValue, DataFusionError> {
178        let arg_fields = args
179            .iter()
180            .map(|arg| Field::new("a", arg.data_type(), true).into())
181            .collect::<Vec<_>>();
182
183        let args = datafusion_expr::ScalarFunctionArgs {
184            args,
185            arg_fields,
186            number_rows,
187            return_field: Field::new("f", DataType::Date32, true).into(),
188            config_options: Arc::new(ConfigOptions::default()),
189            lambdas: None,
190        };
191        ToDateFunc::new().invoke_with_args(args)
192    }
193
194    #[test]
195    fn test_to_date_without_format() {
196        struct TestCase {
197            name: &'static str,
198            date_str: &'static str,
199        }
200
201        let test_cases = vec![
202            TestCase {
203                name: "Largest four-digit year (9999)",
204                date_str: "9999-12-31",
205            },
206            TestCase {
207                name: "Year 1 (0001)",
208                date_str: "0001-12-31",
209            },
210            TestCase {
211                name: "Year before epoch (1969)",
212                date_str: "1969-01-01",
213            },
214            TestCase {
215                name: "Switch Julian/Gregorian calendar (1582-10-10)",
216                date_str: "1582-10-10",
217            },
218        ];
219
220        for tc in &test_cases {
221            test_scalar(ScalarValue::Utf8(Some(tc.date_str.to_string())), tc);
222            test_scalar(ScalarValue::LargeUtf8(Some(tc.date_str.to_string())), tc);
223            test_scalar(ScalarValue::Utf8View(Some(tc.date_str.to_string())), tc);
224
225            test_array::<GenericStringArray<i32>>(tc);
226            test_array::<GenericStringArray<i64>>(tc);
227            test_array::<StringViewArray>(tc);
228        }
229
230        fn test_scalar(sv: ScalarValue, tc: &TestCase) {
231            let to_date_result =
232                invoke_to_date_with_args(vec![ColumnarValue::Scalar(sv)], 1);
233
234            match to_date_result {
235                Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
236                    let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
237                    assert_eq!(
238                        date_val, expected,
239                        "{}: to_date created wrong value",
240                        tc.name
241                    );
242                }
243                _ => panic!("Could not convert '{}' to Date", tc.date_str),
244            }
245        }
246
247        fn test_array<A>(tc: &TestCase)
248        where
249            A: From<Vec<&'static str>> + Array + 'static,
250        {
251            let date_array = A::from(vec![tc.date_str]);
252            let batch_len = date_array.len();
253            let to_date_result = invoke_to_date_with_args(
254                vec![ColumnarValue::Array(Arc::new(date_array))],
255                batch_len,
256            );
257
258            match to_date_result {
259                Ok(ColumnarValue::Array(a)) => {
260                    assert_eq!(a.len(), 1);
261
262                    let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
263                    let mut builder = Date32Array::builder(4);
264                    builder.append_value(expected.unwrap());
265
266                    assert_eq!(
267                        &builder.finish() as &dyn Array,
268                        a.as_ref(),
269                        "{}: to_date created wrong value",
270                        tc.name
271                    );
272                }
273                _ => panic!("Could not convert '{}' to Date", tc.date_str),
274            }
275        }
276    }
277
278    #[test]
279    fn test_to_date_with_format() {
280        struct TestCase {
281            name: &'static str,
282            date_str: &'static str,
283            format_str: &'static str,
284            formatted_date: &'static str,
285        }
286
287        let test_cases = vec![
288            TestCase {
289                name: "Largest four-digit year (9999)",
290                date_str: "9999-12-31",
291                format_str: "%Y%m%d",
292                formatted_date: "99991231",
293            },
294            TestCase {
295                name: "Smallest four-digit year (-9999)",
296                date_str: "-9999-12-31",
297                format_str: "%Y/%m/%d",
298                formatted_date: "-9999/12/31",
299            },
300            TestCase {
301                name: "Year 1 (0001)",
302                date_str: "0001-12-31",
303                format_str: "%Y%m%d",
304                formatted_date: "00011231",
305            },
306            TestCase {
307                name: "Year before epoch (1969)",
308                date_str: "1969-01-01",
309                format_str: "%Y%m%d",
310                formatted_date: "19690101",
311            },
312            TestCase {
313                name: "Switch Julian/Gregorian calendar (1582-10-10)",
314                date_str: "1582-10-10",
315                format_str: "%Y%m%d",
316                formatted_date: "15821010",
317            },
318            TestCase {
319                name: "Negative Year, BC (-42-01-01)",
320                date_str: "-42-01-01",
321                format_str: "%Y/%m/%d",
322                formatted_date: "-42/01/01",
323            },
324        ];
325
326        for tc in &test_cases {
327            test_scalar(ScalarValue::Utf8(Some(tc.formatted_date.to_string())), tc);
328            test_scalar(
329                ScalarValue::LargeUtf8(Some(tc.formatted_date.to_string())),
330                tc,
331            );
332            test_scalar(
333                ScalarValue::Utf8View(Some(tc.formatted_date.to_string())),
334                tc,
335            );
336
337            test_array::<GenericStringArray<i32>>(tc);
338            test_array::<GenericStringArray<i64>>(tc);
339            test_array::<StringViewArray>(tc);
340        }
341
342        fn test_scalar(sv: ScalarValue, tc: &TestCase) {
343            let format_scalar = ScalarValue::Utf8(Some(tc.format_str.to_string()));
344
345            let to_date_result = invoke_to_date_with_args(
346                vec![
347                    ColumnarValue::Scalar(sv),
348                    ColumnarValue::Scalar(format_scalar),
349                ],
350                1,
351            );
352
353            match to_date_result {
354                Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
355                    let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
356                    assert_eq!(date_val, expected, "{}: to_date created wrong value for date '{}' with format string '{}'", tc.name, tc.formatted_date, tc.format_str);
357                }
358                _ => panic!(
359                    "Could not convert '{}' with format string '{}'to Date",
360                    tc.date_str, tc.format_str
361                ),
362            }
363        }
364
365        fn test_array<A>(tc: &TestCase)
366        where
367            A: From<Vec<&'static str>> + Array + 'static,
368        {
369            let date_array = A::from(vec![tc.formatted_date]);
370            let format_array = A::from(vec![tc.format_str]);
371            let batch_len = date_array.len();
372
373            let to_date_result = invoke_to_date_with_args(
374                vec![
375                    ColumnarValue::Array(Arc::new(date_array)),
376                    ColumnarValue::Array(Arc::new(format_array)),
377                ],
378                batch_len,
379            );
380
381            match to_date_result {
382                Ok(ColumnarValue::Array(a)) => {
383                    assert_eq!(a.len(), 1);
384
385                    let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
386                    let mut builder = Date32Array::builder(4);
387                    builder.append_value(expected.unwrap());
388
389                    assert_eq!(
390                        &builder.finish() as &dyn Array, a.as_ref(),
391                        "{}: to_date created wrong value for date '{}' with format string '{}'",
392                        tc.name,
393                        tc.formatted_date,
394                        tc.format_str
395                    );
396                }
397                _ => panic!(
398                    "Could not convert '{}' with format string '{}'to Date: {:?}",
399                    tc.formatted_date, tc.format_str, to_date_result
400                ),
401            }
402        }
403    }
404
405    #[test]
406    fn test_to_date_multiple_format_strings() {
407        let formatted_date_scalar = ScalarValue::Utf8(Some("2023/01/31".into()));
408        let format1_scalar = ScalarValue::Utf8(Some("%Y-%m-%d".into()));
409        let format2_scalar = ScalarValue::Utf8(Some("%Y/%m/%d".into()));
410
411        let to_date_result = invoke_to_date_with_args(
412            vec![
413                ColumnarValue::Scalar(formatted_date_scalar),
414                ColumnarValue::Scalar(format1_scalar),
415                ColumnarValue::Scalar(format2_scalar),
416            ],
417            1,
418        );
419
420        match to_date_result {
421            Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
422                let expected = Date32Type::parse_formatted("2023-01-31", "%Y-%m-%d");
423                assert_eq!(
424                    date_val, expected,
425                    "to_date created wrong value for date with 2 format strings"
426                );
427            }
428            _ => panic!("Conversion failed",),
429        }
430    }
431
432    #[test]
433    fn test_to_date_from_timestamp() {
434        let test_cases = vec![
435            "2020-09-08T13:42:29Z",
436            "2020-09-08T13:42:29.190855-05:00",
437            "2020-09-08 12:13:29",
438        ];
439        for date_str in test_cases {
440            let formatted_date_scalar = ScalarValue::Utf8(Some(date_str.into()));
441
442            let to_date_result = invoke_to_date_with_args(
443                vec![ColumnarValue::Scalar(formatted_date_scalar)],
444                1,
445            );
446
447            match to_date_result {
448                Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
449                    let expected = Date32Type::parse_formatted("2020-09-08", "%Y-%m-%d");
450                    assert_eq!(date_val, expected, "to_date created wrong value");
451                }
452                _ => panic!("Conversion of {date_str} failed"),
453            }
454        }
455    }
456
457    #[test]
458    fn test_to_date_string_with_valid_number() {
459        let date_str = "20241231";
460        let date_scalar = ScalarValue::Utf8(Some(date_str.into()));
461
462        let to_date_result =
463            invoke_to_date_with_args(vec![ColumnarValue::Scalar(date_scalar)], 1);
464
465        match to_date_result {
466            Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
467                let expected = Date32Type::parse_formatted("2024-12-31", "%Y-%m-%d");
468                assert_eq!(
469                    date_val, expected,
470                    "to_date created wrong value for {date_str}"
471                );
472            }
473            _ => panic!("Conversion of {date_str} failed"),
474        }
475    }
476
477    #[test]
478    fn test_to_date_string_with_invalid_number() {
479        let date_str = "202412311";
480        let date_scalar = ScalarValue::Utf8(Some(date_str.into()));
481
482        let to_date_result =
483            invoke_to_date_with_args(vec![ColumnarValue::Scalar(date_scalar)], 1);
484
485        if let Ok(ColumnarValue::Scalar(ScalarValue::Date32(_))) = to_date_result {
486            panic!("Conversion of {date_str} succeeded, but should have failed. ");
487        }
488    }
489}