1use crate::datetime::common::*;
19use arrow::datatypes::DataType;
20use arrow::datatypes::DataType::*;
21use arrow::error::ArrowError::ParseError;
22use arrow::{array::types::Date32Type, compute::kernels::cast_utils::Parser};
23use datafusion_common::error::DataFusionError;
24use datafusion_common::{arrow_err, exec_err, internal_datafusion_err, Result};
25use datafusion_expr::{
26 ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
27};
28use datafusion_macros::user_doc;
29use std::any::Any;
30
31#[user_doc(
32 doc_section(label = "Time and Date Functions"),
33 description = r"Converts a value to a date (`YYYY-MM-DD`).
34Supports strings, integer and double types as input.
35Strings are parsed as YYYY-MM-DD (e.g. '2023-07-20') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided.
36Integers and doubles are interpreted as days since the unix epoch (`1970-01-01T00:00:00Z`).
37Returns the corresponding date.
38
39Note: `to_date` returns Date32, which represents its values as the number of days since unix epoch(`1970-01-01`) stored as signed 32 bit value. The largest supported date value is `9999-12-31`.",
40 syntax_example = "to_date('2017-05-31', '%Y-%m-%d')",
41 sql_example = r#"```sql
42> select to_date('2023-01-31');
43+-------------------------------+
44| to_date(Utf8("2023-01-31")) |
45+-------------------------------+
46| 2023-01-31 |
47+-------------------------------+
48> select to_date('2023/01/31', '%Y-%m-%d', '%Y/%m/%d');
49+---------------------------------------------------------------------+
50| to_date(Utf8("2023/01/31"),Utf8("%Y-%m-%d"),Utf8("%Y/%m/%d")) |
51+---------------------------------------------------------------------+
52| 2023-01-31 |
53+---------------------------------------------------------------------+
54```
55
56Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/date_time_functions.rs)
57"#,
58 standard_argument(name = "expression", prefix = "String"),
59 argument(
60 name = "format_n",
61 description = r"Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order
62 they appear with the first successful one being returned. If none of the formats successfully parse the expression
63 an error will be returned."
64 )
65)]
66#[derive(Debug, PartialEq, Eq, Hash)]
67pub struct ToDateFunc {
68 signature: Signature,
69}
70
71impl Default for ToDateFunc {
72 fn default() -> Self {
73 Self::new()
74 }
75}
76
77impl ToDateFunc {
78 pub fn new() -> Self {
79 Self {
80 signature: Signature::variadic_any(Volatility::Immutable),
81 }
82 }
83
84 fn to_date(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
85 match args.len() {
86 1 => handle::<Date32Type, _, Date32Type>(
87 args,
88 |s| match Date32Type::parse(s) {
89 Some(v) => Ok(v),
90 None => arrow_err!(ParseError(
91 "Unable to cast to Date32 for converting from i64 to i32 failed"
92 .to_string()
93 )),
94 },
95 "to_date",
96 ),
97 2.. => handle_multiple::<Date32Type, _, Date32Type, _>(
98 args,
99 |s, format| {
100 string_to_timestamp_millis_formatted(s, format)
101 .map(|n| n / (24 * 60 * 60 * 1_000))
102 .and_then(|v| {
103 v.try_into().map_err(|_| {
104 internal_datafusion_err!("Unable to cast to Date32 for converting from i64 to i32 failed")
105 })
106 })
107 },
108 |n| n,
109 "to_date",
110 ),
111 0 => exec_err!("Unsupported 0 argument count for function to_date"),
112 }
113 }
114}
115
116impl ScalarUDFImpl for ToDateFunc {
117 fn as_any(&self) -> &dyn Any {
118 self
119 }
120
121 fn name(&self) -> &str {
122 "to_date"
123 }
124
125 fn signature(&self) -> &Signature {
126 &self.signature
127 }
128
129 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
130 Ok(Date32)
131 }
132
133 fn invoke_with_args(
134 &self,
135 args: datafusion_expr::ScalarFunctionArgs,
136 ) -> Result<ColumnarValue> {
137 let args = args.args;
138 if args.is_empty() {
139 return exec_err!("to_date function requires 1 or more arguments, got 0");
140 }
141
142 if args.len() > 1 {
144 validate_data_types(&args, "to_date")?;
145 }
146
147 match args[0].data_type() {
148 Int32 | Int64 | Null | Float64 | Date32 | Date64 => {
149 args[0].cast_to(&Date32, None)
150 }
151 Utf8View | LargeUtf8 | Utf8 => self.to_date(&args),
152 other => {
153 exec_err!("Unsupported data type {} for function to_date", other)
154 }
155 }
156 }
157
158 fn documentation(&self) -> Option<&Documentation> {
159 self.doc()
160 }
161}
162
163#[cfg(test)]
164mod tests {
165 use super::ToDateFunc;
166 use arrow::array::{Array, Date32Array, GenericStringArray, StringViewArray};
167 use arrow::datatypes::{DataType, Field};
168 use arrow::{compute::kernels::cast_utils::Parser, datatypes::Date32Type};
169 use datafusion_common::config::ConfigOptions;
170 use datafusion_common::{DataFusionError, ScalarValue};
171 use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
172 use std::sync::Arc;
173
174 fn invoke_to_date_with_args(
175 args: Vec<ColumnarValue>,
176 number_rows: usize,
177 ) -> Result<ColumnarValue, DataFusionError> {
178 let arg_fields = args
179 .iter()
180 .map(|arg| Field::new("a", arg.data_type(), true).into())
181 .collect::<Vec<_>>();
182
183 let args = datafusion_expr::ScalarFunctionArgs {
184 args,
185 arg_fields,
186 number_rows,
187 return_field: Field::new("f", DataType::Date32, true).into(),
188 config_options: Arc::new(ConfigOptions::default()),
189 lambdas: None,
190 };
191 ToDateFunc::new().invoke_with_args(args)
192 }
193
194 #[test]
195 fn test_to_date_without_format() {
196 struct TestCase {
197 name: &'static str,
198 date_str: &'static str,
199 }
200
201 let test_cases = vec![
202 TestCase {
203 name: "Largest four-digit year (9999)",
204 date_str: "9999-12-31",
205 },
206 TestCase {
207 name: "Year 1 (0001)",
208 date_str: "0001-12-31",
209 },
210 TestCase {
211 name: "Year before epoch (1969)",
212 date_str: "1969-01-01",
213 },
214 TestCase {
215 name: "Switch Julian/Gregorian calendar (1582-10-10)",
216 date_str: "1582-10-10",
217 },
218 ];
219
220 for tc in &test_cases {
221 test_scalar(ScalarValue::Utf8(Some(tc.date_str.to_string())), tc);
222 test_scalar(ScalarValue::LargeUtf8(Some(tc.date_str.to_string())), tc);
223 test_scalar(ScalarValue::Utf8View(Some(tc.date_str.to_string())), tc);
224
225 test_array::<GenericStringArray<i32>>(tc);
226 test_array::<GenericStringArray<i64>>(tc);
227 test_array::<StringViewArray>(tc);
228 }
229
230 fn test_scalar(sv: ScalarValue, tc: &TestCase) {
231 let to_date_result =
232 invoke_to_date_with_args(vec![ColumnarValue::Scalar(sv)], 1);
233
234 match to_date_result {
235 Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
236 let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
237 assert_eq!(
238 date_val, expected,
239 "{}: to_date created wrong value",
240 tc.name
241 );
242 }
243 _ => panic!("Could not convert '{}' to Date", tc.date_str),
244 }
245 }
246
247 fn test_array<A>(tc: &TestCase)
248 where
249 A: From<Vec<&'static str>> + Array + 'static,
250 {
251 let date_array = A::from(vec![tc.date_str]);
252 let batch_len = date_array.len();
253 let to_date_result = invoke_to_date_with_args(
254 vec![ColumnarValue::Array(Arc::new(date_array))],
255 batch_len,
256 );
257
258 match to_date_result {
259 Ok(ColumnarValue::Array(a)) => {
260 assert_eq!(a.len(), 1);
261
262 let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
263 let mut builder = Date32Array::builder(4);
264 builder.append_value(expected.unwrap());
265
266 assert_eq!(
267 &builder.finish() as &dyn Array,
268 a.as_ref(),
269 "{}: to_date created wrong value",
270 tc.name
271 );
272 }
273 _ => panic!("Could not convert '{}' to Date", tc.date_str),
274 }
275 }
276 }
277
278 #[test]
279 fn test_to_date_with_format() {
280 struct TestCase {
281 name: &'static str,
282 date_str: &'static str,
283 format_str: &'static str,
284 formatted_date: &'static str,
285 }
286
287 let test_cases = vec![
288 TestCase {
289 name: "Largest four-digit year (9999)",
290 date_str: "9999-12-31",
291 format_str: "%Y%m%d",
292 formatted_date: "99991231",
293 },
294 TestCase {
295 name: "Smallest four-digit year (-9999)",
296 date_str: "-9999-12-31",
297 format_str: "%Y/%m/%d",
298 formatted_date: "-9999/12/31",
299 },
300 TestCase {
301 name: "Year 1 (0001)",
302 date_str: "0001-12-31",
303 format_str: "%Y%m%d",
304 formatted_date: "00011231",
305 },
306 TestCase {
307 name: "Year before epoch (1969)",
308 date_str: "1969-01-01",
309 format_str: "%Y%m%d",
310 formatted_date: "19690101",
311 },
312 TestCase {
313 name: "Switch Julian/Gregorian calendar (1582-10-10)",
314 date_str: "1582-10-10",
315 format_str: "%Y%m%d",
316 formatted_date: "15821010",
317 },
318 TestCase {
319 name: "Negative Year, BC (-42-01-01)",
320 date_str: "-42-01-01",
321 format_str: "%Y/%m/%d",
322 formatted_date: "-42/01/01",
323 },
324 ];
325
326 for tc in &test_cases {
327 test_scalar(ScalarValue::Utf8(Some(tc.formatted_date.to_string())), tc);
328 test_scalar(
329 ScalarValue::LargeUtf8(Some(tc.formatted_date.to_string())),
330 tc,
331 );
332 test_scalar(
333 ScalarValue::Utf8View(Some(tc.formatted_date.to_string())),
334 tc,
335 );
336
337 test_array::<GenericStringArray<i32>>(tc);
338 test_array::<GenericStringArray<i64>>(tc);
339 test_array::<StringViewArray>(tc);
340 }
341
342 fn test_scalar(sv: ScalarValue, tc: &TestCase) {
343 let format_scalar = ScalarValue::Utf8(Some(tc.format_str.to_string()));
344
345 let to_date_result = invoke_to_date_with_args(
346 vec![
347 ColumnarValue::Scalar(sv),
348 ColumnarValue::Scalar(format_scalar),
349 ],
350 1,
351 );
352
353 match to_date_result {
354 Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
355 let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
356 assert_eq!(date_val, expected, "{}: to_date created wrong value for date '{}' with format string '{}'", tc.name, tc.formatted_date, tc.format_str);
357 }
358 _ => panic!(
359 "Could not convert '{}' with format string '{}'to Date",
360 tc.date_str, tc.format_str
361 ),
362 }
363 }
364
365 fn test_array<A>(tc: &TestCase)
366 where
367 A: From<Vec<&'static str>> + Array + 'static,
368 {
369 let date_array = A::from(vec![tc.formatted_date]);
370 let format_array = A::from(vec![tc.format_str]);
371 let batch_len = date_array.len();
372
373 let to_date_result = invoke_to_date_with_args(
374 vec![
375 ColumnarValue::Array(Arc::new(date_array)),
376 ColumnarValue::Array(Arc::new(format_array)),
377 ],
378 batch_len,
379 );
380
381 match to_date_result {
382 Ok(ColumnarValue::Array(a)) => {
383 assert_eq!(a.len(), 1);
384
385 let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
386 let mut builder = Date32Array::builder(4);
387 builder.append_value(expected.unwrap());
388
389 assert_eq!(
390 &builder.finish() as &dyn Array, a.as_ref(),
391 "{}: to_date created wrong value for date '{}' with format string '{}'",
392 tc.name,
393 tc.formatted_date,
394 tc.format_str
395 );
396 }
397 _ => panic!(
398 "Could not convert '{}' with format string '{}'to Date: {:?}",
399 tc.formatted_date, tc.format_str, to_date_result
400 ),
401 }
402 }
403 }
404
405 #[test]
406 fn test_to_date_multiple_format_strings() {
407 let formatted_date_scalar = ScalarValue::Utf8(Some("2023/01/31".into()));
408 let format1_scalar = ScalarValue::Utf8(Some("%Y-%m-%d".into()));
409 let format2_scalar = ScalarValue::Utf8(Some("%Y/%m/%d".into()));
410
411 let to_date_result = invoke_to_date_with_args(
412 vec![
413 ColumnarValue::Scalar(formatted_date_scalar),
414 ColumnarValue::Scalar(format1_scalar),
415 ColumnarValue::Scalar(format2_scalar),
416 ],
417 1,
418 );
419
420 match to_date_result {
421 Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
422 let expected = Date32Type::parse_formatted("2023-01-31", "%Y-%m-%d");
423 assert_eq!(
424 date_val, expected,
425 "to_date created wrong value for date with 2 format strings"
426 );
427 }
428 _ => panic!("Conversion failed",),
429 }
430 }
431
432 #[test]
433 fn test_to_date_from_timestamp() {
434 let test_cases = vec![
435 "2020-09-08T13:42:29Z",
436 "2020-09-08T13:42:29.190855-05:00",
437 "2020-09-08 12:13:29",
438 ];
439 for date_str in test_cases {
440 let formatted_date_scalar = ScalarValue::Utf8(Some(date_str.into()));
441
442 let to_date_result = invoke_to_date_with_args(
443 vec![ColumnarValue::Scalar(formatted_date_scalar)],
444 1,
445 );
446
447 match to_date_result {
448 Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
449 let expected = Date32Type::parse_formatted("2020-09-08", "%Y-%m-%d");
450 assert_eq!(date_val, expected, "to_date created wrong value");
451 }
452 _ => panic!("Conversion of {date_str} failed"),
453 }
454 }
455 }
456
457 #[test]
458 fn test_to_date_string_with_valid_number() {
459 let date_str = "20241231";
460 let date_scalar = ScalarValue::Utf8(Some(date_str.into()));
461
462 let to_date_result =
463 invoke_to_date_with_args(vec![ColumnarValue::Scalar(date_scalar)], 1);
464
465 match to_date_result {
466 Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
467 let expected = Date32Type::parse_formatted("2024-12-31", "%Y-%m-%d");
468 assert_eq!(
469 date_val, expected,
470 "to_date created wrong value for {date_str}"
471 );
472 }
473 _ => panic!("Conversion of {date_str} failed"),
474 }
475 }
476
477 #[test]
478 fn test_to_date_string_with_invalid_number() {
479 let date_str = "202412311";
480 let date_scalar = ScalarValue::Utf8(Some(date_str.into()));
481
482 let to_date_result =
483 invoke_to_date_with_args(vec![ColumnarValue::Scalar(date_scalar)], 1);
484
485 if let Ok(ColumnarValue::Scalar(ScalarValue::Date32(_))) = to_date_result {
486 panic!("Conversion of {date_str} succeeded, but should have failed. ");
487 }
488 }
489}