1use datafusion_common::config::SqlParserOptions;
24use datafusion_common::DataFusionError;
25use datafusion_common::{sql_err, Diagnostic, Span};
26use sqlparser::ast::{ExprWithAlias, OrderByOptions};
27use sqlparser::tokenizer::TokenWithSpan;
28use sqlparser::{
29 ast::{
30 ColumnDef, ColumnOptionDef, ObjectName, OrderByExpr, Query,
31 Statement as SQLStatement, TableConstraint, Value,
32 },
33 dialect::{keywords::Keyword, Dialect, GenericDialect},
34 parser::{Parser, ParserError},
35 tokenizer::{Token, Tokenizer, Word},
36};
37use std::collections::VecDeque;
38use std::fmt;
39
40macro_rules! parser_err {
42 ($MSG:expr $(; diagnostic = $DIAG:expr)?) => {{
43
44 let err = DataFusionError::from(ParserError::ParserError($MSG.to_string()));
45 $(
46 let err = err.with_diagnostic($DIAG);
47 )?
48 Err(err)
49 }};
50}
51
52fn parse_file_type(s: &str) -> Result<String, DataFusionError> {
53 Ok(s.to_uppercase())
54}
55
56#[derive(Debug, Clone, PartialEq, Eq)]
63pub struct ExplainStatement {
64 pub analyze: bool,
66 pub verbose: bool,
68 pub format: Option<String>,
70 pub statement: Box<Statement>,
74}
75
76impl fmt::Display for ExplainStatement {
77 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
78 let Self {
79 analyze,
80 verbose,
81 format,
82 statement,
83 } = self;
84
85 write!(f, "EXPLAIN ")?;
86 if *analyze {
87 write!(f, "ANALYZE ")?;
88 }
89 if *verbose {
90 write!(f, "VERBOSE ")?;
91 }
92 if let Some(format) = format.as_ref() {
93 write!(f, "FORMAT {format} ")?;
94 }
95
96 write!(f, "{statement}")
97 }
98}
99
100#[derive(Debug, Clone, PartialEq, Eq)]
124pub struct CopyToStatement {
125 pub source: CopyToSource,
127 pub target: String,
129 pub partitioned_by: Vec<String>,
131 pub stored_as: Option<String>,
133 pub options: Vec<(String, Value)>,
135}
136
137impl fmt::Display for CopyToStatement {
138 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
139 let Self {
140 source,
141 target,
142 partitioned_by,
143 stored_as,
144 options,
145 ..
146 } = self;
147
148 write!(f, "COPY {source} TO {target}")?;
149 if let Some(file_type) = stored_as {
150 write!(f, " STORED AS {file_type}")?;
151 }
152 if !partitioned_by.is_empty() {
153 write!(f, " PARTITIONED BY ({})", partitioned_by.join(", "))?;
154 }
155
156 if !options.is_empty() {
157 let opts: Vec<_> =
158 options.iter().map(|(k, v)| format!("'{k}' {v}")).collect();
159 write!(f, " OPTIONS ({})", opts.join(", "))?;
160 }
161
162 Ok(())
163 }
164}
165
166#[derive(Debug, Clone, PartialEq, Eq)]
167pub enum CopyToSource {
168 Relation(ObjectName),
170 Query(Box<Query>),
172}
173
174impl fmt::Display for CopyToSource {
175 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
176 match self {
177 CopyToSource::Relation(r) => write!(f, "{r}"),
178 CopyToSource::Query(q) => write!(f, "({q})"),
179 }
180 }
181}
182
183pub(crate) type LexOrdering = Vec<OrderByExpr>;
185
186#[derive(Debug, Clone, PartialEq, Eq)]
211pub struct CreateExternalTable {
212 pub name: ObjectName,
214 pub columns: Vec<ColumnDef>,
216 pub file_type: String,
218 pub location: String,
220 pub table_partition_cols: Vec<String>,
222 pub order_exprs: Vec<LexOrdering>,
224 pub if_not_exists: bool,
226 pub or_replace: bool,
228 pub temporary: bool,
230 pub unbounded: bool,
232 pub options: Vec<(String, Value)>,
234 pub constraints: Vec<TableConstraint>,
236}
237
238impl fmt::Display for CreateExternalTable {
239 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
240 write!(f, "CREATE EXTERNAL TABLE ")?;
241 if self.if_not_exists {
242 write!(f, "IF NOT EXISTS ")?;
243 }
244 write!(f, "{} ", self.name)?;
245 write!(f, "STORED AS {} ", self.file_type)?;
246 if !self.order_exprs.is_empty() {
247 write!(f, "WITH ORDER (")?;
248 let mut first = true;
249 for expr in self.order_exprs.iter().flatten() {
250 if !first {
251 write!(f, ", ")?;
252 }
253 write!(f, "{expr}")?;
254 first = false;
255 }
256 write!(f, ") ")?;
257 }
258 write!(f, "LOCATION {}", self.location)
259 }
260}
261
262#[derive(Debug, Clone, PartialEq, Eq)]
270pub enum Statement {
271 Statement(Box<SQLStatement>),
273 CreateExternalTable(CreateExternalTable),
275 CopyTo(CopyToStatement),
277 Explain(ExplainStatement),
279}
280
281impl fmt::Display for Statement {
282 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
283 match self {
284 Statement::Statement(stmt) => write!(f, "{stmt}"),
285 Statement::CreateExternalTable(stmt) => write!(f, "{stmt}"),
286 Statement::CopyTo(stmt) => write!(f, "{stmt}"),
287 Statement::Explain(stmt) => write!(f, "{stmt}"),
288 }
289 }
290}
291
292fn ensure_not_set<T>(field: &Option<T>, name: &str) -> Result<(), DataFusionError> {
293 if field.is_some() {
294 parser_err!(format!("{name} specified more than once",))?
295 }
296 Ok(())
297}
298
299pub struct DFParser<'a> {
308 pub parser: Parser<'a>,
309 options: SqlParserOptions,
310}
311
312const DEFAULT_RECURSION_LIMIT: usize = 50;
314const DEFAULT_DIALECT: GenericDialect = GenericDialect {};
315
316pub struct DFParserBuilder<'a> {
350 sql: &'a str,
352 dialect: &'a dyn Dialect,
354 recursion_limit: usize,
356}
357
358impl<'a> DFParserBuilder<'a> {
359 pub fn new(sql: &'a str) -> Self {
362 Self {
363 sql,
364 dialect: &DEFAULT_DIALECT,
365 recursion_limit: DEFAULT_RECURSION_LIMIT,
366 }
367 }
368
369 pub fn with_dialect(mut self, dialect: &'a dyn Dialect) -> Self {
371 self.dialect = dialect;
372 self
373 }
374
375 pub fn with_recursion_limit(mut self, recursion_limit: usize) -> Self {
377 self.recursion_limit = recursion_limit;
378 self
379 }
380
381 pub fn build(self) -> Result<DFParser<'a>, DataFusionError> {
382 let mut tokenizer = Tokenizer::new(self.dialect, self.sql);
383 let tokens = tokenizer
385 .tokenize_with_location()
386 .map_err(ParserError::from)?;
387
388 Ok(DFParser {
389 parser: Parser::new(self.dialect)
390 .with_tokens_with_locations(tokens)
391 .with_recursion_limit(self.recursion_limit),
392 options: SqlParserOptions {
393 recursion_limit: self.recursion_limit,
394 ..Default::default()
395 },
396 })
397 }
398}
399
400impl<'a> DFParser<'a> {
401 #[deprecated(since = "46.0.0", note = "DFParserBuilder")]
402 pub fn new(sql: &'a str) -> Result<Self, DataFusionError> {
403 DFParserBuilder::new(sql).build()
404 }
405
406 #[deprecated(since = "46.0.0", note = "DFParserBuilder")]
407 pub fn new_with_dialect(
408 sql: &'a str,
409 dialect: &'a dyn Dialect,
410 ) -> Result<Self, DataFusionError> {
411 DFParserBuilder::new(sql).with_dialect(dialect).build()
412 }
413
414 pub fn parse_sql(sql: &'a str) -> Result<VecDeque<Statement>, DataFusionError> {
417 let mut parser = DFParserBuilder::new(sql).build()?;
418
419 parser.parse_statements()
420 }
421
422 pub fn parse_sql_with_dialect(
425 sql: &str,
426 dialect: &dyn Dialect,
427 ) -> Result<VecDeque<Statement>, DataFusionError> {
428 let mut parser = DFParserBuilder::new(sql).with_dialect(dialect).build()?;
429 parser.parse_statements()
430 }
431
432 pub fn parse_sql_into_expr(sql: &str) -> Result<ExprWithAlias, DataFusionError> {
433 DFParserBuilder::new(sql).build()?.parse_into_expr()
434 }
435
436 pub fn parse_sql_into_expr_with_dialect(
437 sql: &str,
438 dialect: &dyn Dialect,
439 ) -> Result<ExprWithAlias, DataFusionError> {
440 DFParserBuilder::new(sql)
441 .with_dialect(dialect)
442 .build()?
443 .parse_into_expr()
444 }
445
446 pub fn parse_statements(&mut self) -> Result<VecDeque<Statement>, DataFusionError> {
448 let mut stmts = VecDeque::new();
449 let mut expecting_statement_delimiter = false;
450 loop {
451 while self.parser.consume_token(&Token::SemiColon) {
453 expecting_statement_delimiter = false;
454 }
455
456 if self.parser.peek_token() == Token::EOF {
457 break;
458 }
459 if expecting_statement_delimiter {
460 return self.expected("end of statement", self.parser.peek_token());
461 }
462
463 let statement = self.parse_statement()?;
464 stmts.push_back(statement);
465 expecting_statement_delimiter = true;
466 }
467 Ok(stmts)
468 }
469
470 fn expected<T>(
472 &self,
473 expected: &str,
474 found: TokenWithSpan,
475 ) -> Result<T, DataFusionError> {
476 let sql_parser_span = found.span;
477 let span = Span::try_from_sqlparser_span(sql_parser_span);
478 let diagnostic = Diagnostic::new_error(
479 format!("Expected: {expected}, found: {found}{}", found.span.start),
480 span,
481 );
482 parser_err!(
483 format!("Expected: {expected}, found: {found}{}", found.span.start);
484 diagnostic=
485 diagnostic
486 )
487 }
488
489 fn expect_token(
490 &mut self,
491 expected: &str,
492 token: Token,
493 ) -> Result<(), DataFusionError> {
494 let next_token = self.parser.peek_token_ref();
495 if next_token.token != token {
496 self.expected(expected, next_token.clone())
497 } else {
498 Ok(())
499 }
500 }
501
502 pub fn parse_statement(&mut self) -> Result<Statement, DataFusionError> {
504 match self.parser.peek_token().token {
505 Token::Word(w) => {
506 match w.keyword {
507 Keyword::CREATE => {
508 self.parser.next_token(); self.parse_create()
510 }
511 Keyword::COPY => {
512 if let Token::Word(w) = self.parser.peek_nth_token(1).token {
513 if w.keyword == Keyword::INTO {
515 return self.parse_and_handle_statement();
516 }
517 }
518 self.parser.next_token(); self.parse_copy()
520 }
521 Keyword::EXPLAIN => {
522 self.parser.next_token(); self.parse_explain()
524 }
525 _ => {
526 self.parse_and_handle_statement()
528 }
529 }
530 }
531 _ => {
532 self.parse_and_handle_statement()
534 }
535 }
536 }
537
538 pub fn parse_expr(&mut self) -> Result<ExprWithAlias, DataFusionError> {
539 if let Token::Word(w) = self.parser.peek_token().token {
540 match w.keyword {
541 Keyword::CREATE | Keyword::COPY | Keyword::EXPLAIN => {
542 return parser_err!("Unsupported command in expression")?;
543 }
544 _ => {}
545 }
546 }
547
548 Ok(self.parser.parse_expr_with_alias()?)
549 }
550
551 pub fn parse_into_expr(&mut self) -> Result<ExprWithAlias, DataFusionError> {
556 let expr = self.parse_expr()?;
557 self.expect_token("end of expression", Token::EOF)?;
558 Ok(expr)
559 }
560
561 fn parse_and_handle_statement(&mut self) -> Result<Statement, DataFusionError> {
563 self.parser
564 .parse_statement()
565 .map(|stmt| Statement::Statement(Box::from(stmt)))
566 .map_err(|e| match e {
567 ParserError::RecursionLimitExceeded => DataFusionError::SQL(
568 Box::new(ParserError::RecursionLimitExceeded),
569 Some(format!(
570 " (current limit: {})",
571 self.options.recursion_limit
572 )),
573 ),
574 other => DataFusionError::SQL(Box::new(other), None),
575 })
576 }
577
578 pub fn parse_copy(&mut self) -> Result<Statement, DataFusionError> {
580 let source = if self.parser.consume_token(&Token::LParen) {
582 let query = self.parser.parse_query()?;
583 self.parser.expect_token(&Token::RParen)?;
584 CopyToSource::Query(query)
585 } else {
586 let table_name = self.parser.parse_object_name(true)?;
588 CopyToSource::Relation(table_name)
589 };
590
591 #[derive(Default)]
592 struct Builder {
593 stored_as: Option<String>,
594 target: Option<String>,
595 partitioned_by: Option<Vec<String>>,
596 options: Option<Vec<(String, Value)>>,
597 }
598
599 let mut builder = Builder::default();
600
601 loop {
602 if let Some(keyword) = self.parser.parse_one_of_keywords(&[
603 Keyword::STORED,
604 Keyword::TO,
605 Keyword::PARTITIONED,
606 Keyword::OPTIONS,
607 Keyword::WITH,
608 ]) {
609 match keyword {
610 Keyword::STORED => {
611 self.parser.expect_keyword(Keyword::AS)?;
612 ensure_not_set(&builder.stored_as, "STORED AS")?;
613 builder.stored_as = Some(self.parse_file_format()?);
614 }
615 Keyword::TO => {
616 ensure_not_set(&builder.target, "TO")?;
617 builder.target = Some(self.parser.parse_literal_string()?);
618 }
619 Keyword::WITH => {
620 self.parser.expect_keyword(Keyword::HEADER)?;
621 self.parser.expect_keyword(Keyword::ROW)?;
622 return parser_err!("WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS ('format.has_header' 'true')")?;
623 }
624 Keyword::PARTITIONED => {
625 self.parser.expect_keyword(Keyword::BY)?;
626 ensure_not_set(&builder.partitioned_by, "PARTITIONED BY")?;
627 builder.partitioned_by = Some(self.parse_partitions()?);
628 }
629 Keyword::OPTIONS => {
630 ensure_not_set(&builder.options, "OPTIONS")?;
631 builder.options = Some(self.parse_value_options()?);
632 }
633 _ => {
634 unreachable!()
635 }
636 }
637 } else {
638 let token = self.parser.next_token();
639 if token == Token::EOF || token == Token::SemiColon {
640 break;
641 } else {
642 return self.expected("end of statement or ;", token)?;
643 }
644 }
645 }
646
647 let Some(target) = builder.target else {
648 return parser_err!("Missing TO clause in COPY statement")?;
649 };
650
651 Ok(Statement::CopyTo(CopyToStatement {
652 source,
653 target,
654 partitioned_by: builder.partitioned_by.unwrap_or(vec![]),
655 stored_as: builder.stored_as,
656 options: builder.options.unwrap_or(vec![]),
657 }))
658 }
659
660 pub fn parse_option_key(&mut self) -> Result<String, DataFusionError> {
667 let next_token = self.parser.next_token();
668 match next_token.token {
669 Token::Word(Word { value, .. }) => {
670 let mut parts = vec![value];
671 while self.parser.consume_token(&Token::Period) {
672 let next_token = self.parser.next_token();
673 if let Token::Word(Word { value, .. }) = next_token.token {
674 parts.push(value);
675 } else {
676 return self.expected("key name", next_token);
680 }
681 }
682 Ok(parts.join("."))
683 }
684 Token::SingleQuotedString(s) => Ok(s),
685 Token::DoubleQuotedString(s) => Ok(s),
686 Token::EscapedStringLiteral(s) => Ok(s),
687 _ => self.expected("key name", next_token),
688 }
689 }
690
691 pub fn parse_option_value(&mut self) -> Result<Value, DataFusionError> {
698 let next_token = self.parser.next_token();
699 match next_token.token {
700 Token::Word(word) => Ok(Value::SingleQuotedString(word.value)),
702 Token::SingleQuotedString(s) => Ok(Value::SingleQuotedString(s)),
703 Token::DoubleQuotedString(s) => Ok(Value::DoubleQuotedString(s)),
704 Token::EscapedStringLiteral(s) => Ok(Value::EscapedStringLiteral(s)),
705 Token::Number(n, l) => Ok(Value::Number(n, l)),
706 _ => self.expected("string or numeric value", next_token),
707 }
708 }
709
710 pub fn parse_explain(&mut self) -> Result<Statement, DataFusionError> {
712 let analyze = self.parser.parse_keyword(Keyword::ANALYZE);
713 let verbose = self.parser.parse_keyword(Keyword::VERBOSE);
714 let format = self.parse_explain_format()?;
715
716 let statement = self.parse_statement()?;
717
718 Ok(Statement::Explain(ExplainStatement {
719 statement: Box::new(statement),
720 analyze,
721 verbose,
722 format,
723 }))
724 }
725
726 pub fn parse_explain_format(&mut self) -> Result<Option<String>, DataFusionError> {
727 if !self.parser.parse_keyword(Keyword::FORMAT) {
728 return Ok(None);
729 }
730
731 let next_token = self.parser.next_token();
732 let format = match next_token.token {
733 Token::Word(w) => Ok(w.value),
734 Token::SingleQuotedString(w) => Ok(w),
735 Token::DoubleQuotedString(w) => Ok(w),
736 _ => self.expected("an explain format such as TREE", next_token),
737 }?;
738 Ok(Some(format))
739 }
740
741 pub fn parse_create(&mut self) -> Result<Statement, DataFusionError> {
743 if self
745 .parser
746 .parse_keywords(&[Keyword::OR, Keyword::REPLACE, Keyword::EXTERNAL])
747 {
748 self.parse_create_external_table(false, true)
749 } else if self.parser.parse_keywords(&[
750 Keyword::OR,
751 Keyword::REPLACE,
752 Keyword::UNBOUNDED,
753 Keyword::EXTERNAL,
754 ]) {
755 self.parse_create_external_table(true, true)
756 } else if self.parser.parse_keyword(Keyword::EXTERNAL) {
757 self.parse_create_external_table(false, false)
758 } else if self
759 .parser
760 .parse_keywords(&[Keyword::UNBOUNDED, Keyword::EXTERNAL])
761 {
762 self.parse_create_external_table(true, false)
763 } else {
764 Ok(Statement::Statement(Box::from(self.parser.parse_create()?)))
765 }
766 }
767
768 fn parse_partitions(&mut self) -> Result<Vec<String>, DataFusionError> {
769 let mut partitions: Vec<String> = vec![];
770 if !self.parser.consume_token(&Token::LParen)
771 || self.parser.consume_token(&Token::RParen)
772 {
773 return Ok(partitions);
774 }
775
776 loop {
777 if let Token::Word(_) = self.parser.peek_token().token {
778 let identifier = self.parser.parse_identifier()?;
779 partitions.push(identifier.to_string());
780 } else {
781 return self.expected("partition name", self.parser.peek_token());
782 }
783 let comma = self.parser.consume_token(&Token::Comma);
784 if self.parser.consume_token(&Token::RParen) {
785 break;
787 } else if !comma {
788 return self.expected(
789 "',' or ')' after partition definition",
790 self.parser.peek_token(),
791 );
792 }
793 }
794 Ok(partitions)
795 }
796
797 pub fn parse_order_by_exprs(&mut self) -> Result<Vec<OrderByExpr>, DataFusionError> {
799 let mut values = vec![];
800 self.parser.expect_token(&Token::LParen)?;
801 loop {
802 values.push(self.parse_order_by_expr()?);
803 if !self.parser.consume_token(&Token::Comma) {
804 self.parser.expect_token(&Token::RParen)?;
805 return Ok(values);
806 }
807 }
808 }
809
810 pub fn parse_order_by_expr(&mut self) -> Result<OrderByExpr, DataFusionError> {
812 let expr = self.parser.parse_expr()?;
813
814 let asc = if self.parser.parse_keyword(Keyword::ASC) {
815 Some(true)
816 } else if self.parser.parse_keyword(Keyword::DESC) {
817 Some(false)
818 } else {
819 None
820 };
821
822 let nulls_first = if self
823 .parser
824 .parse_keywords(&[Keyword::NULLS, Keyword::FIRST])
825 {
826 Some(true)
827 } else if self.parser.parse_keywords(&[Keyword::NULLS, Keyword::LAST]) {
828 Some(false)
829 } else {
830 None
831 };
832
833 Ok(OrderByExpr {
834 expr,
835 options: OrderByOptions { asc, nulls_first },
836 with_fill: None,
837 })
838 }
839
840 fn parse_columns(
842 &mut self,
843 ) -> Result<(Vec<ColumnDef>, Vec<TableConstraint>), DataFusionError> {
844 let mut columns = vec![];
845 let mut constraints = vec![];
846 if !self.parser.consume_token(&Token::LParen)
847 || self.parser.consume_token(&Token::RParen)
848 {
849 return Ok((columns, constraints));
850 }
851
852 loop {
853 if let Some(constraint) = self.parser.parse_optional_table_constraint()? {
854 constraints.push(constraint);
855 } else if let Token::Word(_) = self.parser.peek_token().token {
856 let column_def = self.parse_column_def()?;
857 columns.push(column_def);
858 } else {
859 return self.expected(
860 "column name or constraint definition",
861 self.parser.peek_token(),
862 );
863 }
864 let comma = self.parser.consume_token(&Token::Comma);
865 if self.parser.consume_token(&Token::RParen) {
866 break;
868 } else if !comma {
869 return self.expected(
870 "',' or ')' after column definition",
871 self.parser.peek_token(),
872 );
873 }
874 }
875
876 Ok((columns, constraints))
877 }
878
879 fn parse_column_def(&mut self) -> Result<ColumnDef, DataFusionError> {
880 let name = self.parser.parse_identifier()?;
881 let data_type = self.parser.parse_data_type()?;
882 let mut options = vec![];
883 loop {
884 if self.parser.parse_keyword(Keyword::CONSTRAINT) {
885 let name = Some(self.parser.parse_identifier()?);
886 if let Some(option) = self.parser.parse_optional_column_option()? {
887 options.push(ColumnOptionDef { name, option });
888 } else {
889 return self.expected(
890 "constraint details after CONSTRAINT <name>",
891 self.parser.peek_token(),
892 );
893 }
894 } else if let Some(option) = self.parser.parse_optional_column_option()? {
895 options.push(ColumnOptionDef { name: None, option });
896 } else {
897 break;
898 };
899 }
900 Ok(ColumnDef {
901 name,
902 data_type,
903 options,
904 })
905 }
906
907 fn parse_create_external_table(
908 &mut self,
909 unbounded: bool,
910 or_replace: bool,
911 ) -> Result<Statement, DataFusionError> {
912 let temporary = self
913 .parser
914 .parse_one_of_keywords(&[Keyword::TEMP, Keyword::TEMPORARY])
915 .is_some();
916
917 self.parser.expect_keyword(Keyword::TABLE)?;
918 let if_not_exists =
919 self.parser
920 .parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]);
921
922 if if_not_exists && or_replace {
923 return parser_err!("'IF NOT EXISTS' cannot coexist with 'REPLACE'");
924 }
925
926 let table_name = self.parser.parse_object_name(true)?;
927 let (mut columns, constraints) = self.parse_columns()?;
928
929 #[derive(Default)]
930 struct Builder {
931 file_type: Option<String>,
932 location: Option<String>,
933 table_partition_cols: Option<Vec<String>>,
934 order_exprs: Vec<LexOrdering>,
935 options: Option<Vec<(String, Value)>>,
936 }
937 let mut builder = Builder::default();
938
939 loop {
940 if let Some(keyword) = self.parser.parse_one_of_keywords(&[
941 Keyword::STORED,
942 Keyword::LOCATION,
943 Keyword::WITH,
944 Keyword::DELIMITER,
945 Keyword::COMPRESSION,
946 Keyword::PARTITIONED,
947 Keyword::OPTIONS,
948 ]) {
949 match keyword {
950 Keyword::STORED => {
951 self.parser.expect_keyword(Keyword::AS)?;
952 ensure_not_set(&builder.file_type, "STORED AS")?;
953 builder.file_type = Some(self.parse_file_format()?);
954 }
955 Keyword::LOCATION => {
956 ensure_not_set(&builder.location, "LOCATION")?;
957 builder.location = Some(self.parser.parse_literal_string()?);
958 }
959 Keyword::WITH => {
960 if self.parser.parse_keyword(Keyword::ORDER) {
961 builder.order_exprs.push(self.parse_order_by_exprs()?);
962 } else {
963 self.parser.expect_keyword(Keyword::HEADER)?;
964 self.parser.expect_keyword(Keyword::ROW)?;
965 return parser_err!("WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS (format.has_header true)")?;
966 }
967 }
968 Keyword::DELIMITER => {
969 return parser_err!("DELIMITER clause is no longer in use. Please use the OPTIONS clause with 'format.delimiter' set appropriately, e.g., OPTIONS (format.delimiter ',')")?;
970 }
971 Keyword::COMPRESSION => {
972 self.parser.expect_keyword(Keyword::TYPE)?;
973 return parser_err!("COMPRESSION TYPE clause is no longer in use. Please use the OPTIONS clause with 'format.compression' set appropriately, e.g., OPTIONS (format.compression gzip)")?;
974 }
975 Keyword::PARTITIONED => {
976 self.parser.expect_keyword(Keyword::BY)?;
977 ensure_not_set(&builder.table_partition_cols, "PARTITIONED BY")?;
978 let peeked = self.parser.peek_nth_token(2);
983 if peeked == Token::Comma || peeked == Token::RParen {
984 builder.table_partition_cols = Some(self.parse_partitions()?)
986 } else {
987 let (cols, cons) = self.parse_columns()?;
989 builder.table_partition_cols = Some(
990 cols.iter().map(|col| col.name.to_string()).collect(),
991 );
992
993 columns.extend(cols);
994
995 if !cons.is_empty() {
996 return sql_err!(ParserError::ParserError(
997 "Constraints on Partition Columns are not supported"
998 .to_string(),
999 ));
1000 }
1001 }
1002 }
1003 Keyword::OPTIONS => {
1004 ensure_not_set(&builder.options, "OPTIONS")?;
1005 builder.options = Some(self.parse_value_options()?);
1006 }
1007 _ => {
1008 unreachable!()
1009 }
1010 }
1011 } else {
1012 let token = self.parser.next_token();
1013 if token == Token::EOF || token == Token::SemiColon {
1014 break;
1015 } else {
1016 return self.expected("end of statement or ;", token)?;
1017 }
1018 }
1019 }
1020
1021 if builder.file_type.is_none() {
1023 return sql_err!(ParserError::ParserError(
1024 "Missing STORED AS clause in CREATE EXTERNAL TABLE statement".into(),
1025 ));
1026 }
1027 if builder.location.is_none() {
1028 return sql_err!(ParserError::ParserError(
1029 "Missing LOCATION clause in CREATE EXTERNAL TABLE statement".into(),
1030 ));
1031 }
1032
1033 let create = CreateExternalTable {
1034 name: table_name,
1035 columns,
1036 file_type: builder.file_type.unwrap(),
1037 location: builder.location.unwrap(),
1038 table_partition_cols: builder.table_partition_cols.unwrap_or(vec![]),
1039 order_exprs: builder.order_exprs,
1040 if_not_exists,
1041 or_replace,
1042 temporary,
1043 unbounded,
1044 options: builder.options.unwrap_or(Vec::new()),
1045 constraints,
1046 };
1047 Ok(Statement::CreateExternalTable(create))
1048 }
1049
1050 fn parse_file_format(&mut self) -> Result<String, DataFusionError> {
1052 let token = self.parser.next_token();
1053 match &token.token {
1054 Token::Word(w) => parse_file_type(&w.value),
1055 _ => self.expected("one of ARROW, PARQUET, NDJSON, or CSV", token),
1056 }
1057 }
1058
1059 fn parse_value_options(&mut self) -> Result<Vec<(String, Value)>, DataFusionError> {
1064 let mut options = vec![];
1065 self.parser.expect_token(&Token::LParen)?;
1066
1067 loop {
1068 let key = self.parse_option_key()?;
1069 let value = self.parse_option_value()?;
1070 options.push((key, value));
1071 let comma = self.parser.consume_token(&Token::Comma);
1072 if self.parser.consume_token(&Token::RParen) {
1073 break;
1075 } else if !comma {
1076 return self.expected(
1077 "',' or ')' after option definition",
1078 self.parser.peek_token(),
1079 );
1080 }
1081 }
1082 Ok(options)
1083 }
1084}
1085
1086#[cfg(test)]
1087mod tests {
1088 use super::*;
1089 use datafusion_common::assert_contains;
1090 use sqlparser::ast::Expr::Identifier;
1091 use sqlparser::ast::{
1092 BinaryOperator, DataType, ExactNumberInfo, Expr, Ident, ValueWithSpan,
1093 };
1094 use sqlparser::dialect::SnowflakeDialect;
1095 use sqlparser::tokenizer::Span;
1096
1097 fn expect_parse_ok(sql: &str, expected: Statement) -> Result<(), DataFusionError> {
1098 let statements = DFParser::parse_sql(sql)?;
1099 assert_eq!(
1100 statements.len(),
1101 1,
1102 "Expected to parse exactly one statement"
1103 );
1104 assert_eq!(statements[0], expected, "actual:\n{:#?}", statements[0]);
1105 Ok(())
1106 }
1107
1108 fn expect_parse_error(sql: &str, expected_error: &str) {
1110 match DFParser::parse_sql(sql) {
1111 Ok(statements) => {
1112 panic!(
1113 "Expected parse error for '{sql}', but was successful: {statements:?}"
1114 );
1115 }
1116 Err(e) => {
1117 let error_message = e.to_string();
1118 assert!(
1119 error_message.contains(expected_error),
1120 "Expected error '{expected_error}' not found in actual error '{error_message}'"
1121 );
1122 }
1123 }
1124 }
1125
1126 fn make_column_def(name: impl Into<String>, data_type: DataType) -> ColumnDef {
1127 ColumnDef {
1128 name: Ident {
1129 value: name.into(),
1130 quote_style: None,
1131 span: Span::empty(),
1132 },
1133 data_type,
1134 options: vec![],
1135 }
1136 }
1137
1138 #[test]
1139 fn create_external_table() -> Result<(), DataFusionError> {
1140 let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'";
1142 let display = None;
1143 let name = ObjectName::from(vec![Ident::from("t")]);
1144 let expected = Statement::CreateExternalTable(CreateExternalTable {
1145 name: name.clone(),
1146 columns: vec![make_column_def("c1", DataType::Int(display))],
1147 file_type: "CSV".to_string(),
1148 location: "foo.csv".into(),
1149 table_partition_cols: vec![],
1150 order_exprs: vec![],
1151 if_not_exists: false,
1152 or_replace: false,
1153 temporary: false,
1154 unbounded: false,
1155 options: vec![],
1156 constraints: vec![],
1157 });
1158 expect_parse_ok(sql, expected)?;
1159
1160 let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' ";
1162 let expected = Statement::CreateExternalTable(CreateExternalTable {
1163 name: name.clone(),
1164 columns: vec![make_column_def("c1", DataType::Int(None))],
1165 file_type: "CSV".to_string(),
1166 location: "foo.csv".into(),
1167 table_partition_cols: vec![],
1168 order_exprs: vec![],
1169 if_not_exists: false,
1170 or_replace: false,
1171 temporary: false,
1172 unbounded: false,
1173 options: vec![],
1174 constraints: vec![],
1175 });
1176 expect_parse_ok(sql, expected)?;
1177
1178 let sql =
1180 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' ;";
1181 let expected = Statement::CreateExternalTable(CreateExternalTable {
1182 name: name.clone(),
1183 columns: vec![make_column_def("c1", DataType::Int(None))],
1184 file_type: "CSV".to_string(),
1185 location: "foo.csv".into(),
1186 table_partition_cols: vec![],
1187 order_exprs: vec![],
1188 if_not_exists: false,
1189 or_replace: false,
1190 temporary: false,
1191 unbounded: false,
1192 options: vec![],
1193 constraints: vec![],
1194 });
1195 expect_parse_ok(sql, expected)?;
1196
1197 let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS (format.delimiter '|')";
1199 let display = None;
1200 let expected = Statement::CreateExternalTable(CreateExternalTable {
1201 name: name.clone(),
1202 columns: vec![make_column_def("c1", DataType::Int(display))],
1203 file_type: "CSV".to_string(),
1204 location: "foo.csv".into(),
1205 table_partition_cols: vec![],
1206 order_exprs: vec![],
1207 if_not_exists: false,
1208 or_replace: false,
1209 temporary: false,
1210 unbounded: false,
1211 options: vec![(
1212 "format.delimiter".into(),
1213 Value::SingleQuotedString("|".into()),
1214 )],
1215 constraints: vec![],
1216 });
1217 expect_parse_ok(sql, expected)?;
1218
1219 let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1, p2) LOCATION 'foo.csv'";
1221 let display = None;
1222 let expected = Statement::CreateExternalTable(CreateExternalTable {
1223 name: name.clone(),
1224 columns: vec![make_column_def("c1", DataType::Int(display))],
1225 file_type: "CSV".to_string(),
1226 location: "foo.csv".into(),
1227 table_partition_cols: vec!["p1".to_string(), "p2".to_string()],
1228 order_exprs: vec![],
1229 if_not_exists: false,
1230 or_replace: false,
1231 temporary: false,
1232 unbounded: false,
1233 options: vec![],
1234 constraints: vec![],
1235 });
1236 expect_parse_ok(sql, expected)?;
1237
1238 let sqls =
1240 vec![
1241 ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1242 ('format.compression' 'GZIP')", "GZIP"),
1243 ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1244 ('format.compression' 'BZIP2')", "BZIP2"),
1245 ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1246 ('format.compression' 'XZ')", "XZ"),
1247 ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1248 ('format.compression' 'ZSTD')", "ZSTD"),
1249 ];
1250 for (sql, compression) in sqls {
1251 let expected = Statement::CreateExternalTable(CreateExternalTable {
1252 name: name.clone(),
1253 columns: vec![make_column_def("c1", DataType::Int(display))],
1254 file_type: "CSV".to_string(),
1255 location: "foo.csv".into(),
1256 table_partition_cols: vec![],
1257 order_exprs: vec![],
1258 if_not_exists: false,
1259 or_replace: false,
1260 temporary: false,
1261 unbounded: false,
1262 options: vec![(
1263 "format.compression".into(),
1264 Value::SingleQuotedString(compression.into()),
1265 )],
1266 constraints: vec![],
1267 });
1268 expect_parse_ok(sql, expected)?;
1269 }
1270
1271 let sql = "CREATE EXTERNAL TABLE t STORED AS PARQUET LOCATION 'foo.parquet'";
1273 let expected = Statement::CreateExternalTable(CreateExternalTable {
1274 name: name.clone(),
1275 columns: vec![],
1276 file_type: "PARQUET".to_string(),
1277 location: "foo.parquet".into(),
1278 table_partition_cols: vec![],
1279 order_exprs: vec![],
1280 if_not_exists: false,
1281 or_replace: false,
1282 temporary: false,
1283 unbounded: false,
1284 options: vec![],
1285 constraints: vec![],
1286 });
1287 expect_parse_ok(sql, expected)?;
1288
1289 let sql = "CREATE EXTERNAL TABLE t STORED AS parqueT LOCATION 'foo.parquet'";
1291 let expected = Statement::CreateExternalTable(CreateExternalTable {
1292 name: name.clone(),
1293 columns: vec![],
1294 file_type: "PARQUET".to_string(),
1295 location: "foo.parquet".into(),
1296 table_partition_cols: vec![],
1297 order_exprs: vec![],
1298 if_not_exists: false,
1299 or_replace: false,
1300 temporary: false,
1301 unbounded: false,
1302 options: vec![],
1303 constraints: vec![],
1304 });
1305 expect_parse_ok(sql, expected)?;
1306
1307 let sql = "CREATE EXTERNAL TABLE t STORED AS AVRO LOCATION 'foo.avro'";
1309 let expected = Statement::CreateExternalTable(CreateExternalTable {
1310 name: name.clone(),
1311 columns: vec![],
1312 file_type: "AVRO".to_string(),
1313 location: "foo.avro".into(),
1314 table_partition_cols: vec![],
1315 order_exprs: vec![],
1316 if_not_exists: false,
1317 or_replace: false,
1318 temporary: false,
1319 unbounded: false,
1320 options: vec![],
1321 constraints: vec![],
1322 });
1323 expect_parse_ok(sql, expected)?;
1324
1325 let sql =
1327 "CREATE EXTERNAL TABLE IF NOT EXISTS t STORED AS PARQUET LOCATION 'foo.parquet'";
1328 let expected = Statement::CreateExternalTable(CreateExternalTable {
1329 name: name.clone(),
1330 columns: vec![],
1331 file_type: "PARQUET".to_string(),
1332 location: "foo.parquet".into(),
1333 table_partition_cols: vec![],
1334 order_exprs: vec![],
1335 if_not_exists: true,
1336 or_replace: false,
1337 temporary: false,
1338 unbounded: false,
1339 options: vec![],
1340 constraints: vec![],
1341 });
1342 expect_parse_ok(sql, expected)?;
1343
1344 let sql =
1346 "CREATE OR REPLACE EXTERNAL TABLE t STORED AS PARQUET LOCATION 'foo.parquet'";
1347 let expected = Statement::CreateExternalTable(CreateExternalTable {
1348 name: name.clone(),
1349 columns: vec![],
1350 file_type: "PARQUET".to_string(),
1351 location: "foo.parquet".into(),
1352 table_partition_cols: vec![],
1353 order_exprs: vec![],
1354 if_not_exists: false,
1355 or_replace: true,
1356 temporary: false,
1357 unbounded: false,
1358 options: vec![],
1359 constraints: vec![],
1360 });
1361 expect_parse_ok(sql, expected)?;
1362
1363 let sql =
1365 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int) LOCATION 'foo.csv'";
1366 let expected = Statement::CreateExternalTable(CreateExternalTable {
1367 name: name.clone(),
1368 columns: vec![
1369 make_column_def("c1", DataType::Int(None)),
1370 make_column_def("p1", DataType::Int(None)),
1371 ],
1372 file_type: "CSV".to_string(),
1373 location: "foo.csv".into(),
1374 table_partition_cols: vec!["p1".to_string()],
1375 order_exprs: vec![],
1376 if_not_exists: false,
1377 or_replace: false,
1378 temporary: false,
1379 unbounded: false,
1380 options: vec![],
1381 constraints: vec![],
1382 });
1383 expect_parse_ok(sql, expected)?;
1384
1385 let sql =
1387 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int, c1) LOCATION 'foo.csv'";
1388 expect_parse_error(
1389 sql,
1390 "SQL error: ParserError(\"Expected: a data type name, found: ) at Line: 1, Column: 73\")",
1391 );
1392
1393 let sql =
1395 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (c1, p1 int) LOCATION 'foo.csv'";
1396 expect_parse_error(sql, "SQL error: ParserError(\"Expected: ',' or ')' after partition definition, found: int at Line: 1, Column: 70\")");
1397
1398 let sql =
1400 "CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1') LOCATION 'blahblah'";
1401 let expected = Statement::CreateExternalTable(CreateExternalTable {
1402 name: name.clone(),
1403 columns: vec![],
1404 file_type: "X".to_string(),
1405 location: "blahblah".into(),
1406 table_partition_cols: vec![],
1407 order_exprs: vec![],
1408 if_not_exists: false,
1409 or_replace: false,
1410 temporary: false,
1411 unbounded: false,
1412 options: vec![("k1".into(), Value::SingleQuotedString("v1".into()))],
1413 constraints: vec![],
1414 });
1415 expect_parse_ok(sql, expected)?;
1416
1417 let sql =
1419 "CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1', k2 v2) LOCATION 'blahblah'";
1420 let expected = Statement::CreateExternalTable(CreateExternalTable {
1421 name: name.clone(),
1422 columns: vec![],
1423 file_type: "X".to_string(),
1424 location: "blahblah".into(),
1425 table_partition_cols: vec![],
1426 order_exprs: vec![],
1427 if_not_exists: false,
1428 or_replace: false,
1429 temporary: false,
1430 unbounded: false,
1431 options: vec![
1432 ("k1".into(), Value::SingleQuotedString("v1".into())),
1433 ("k2".into(), Value::SingleQuotedString("v2".into())),
1434 ],
1435 constraints: vec![],
1436 });
1437 expect_parse_ok(sql, expected)?;
1438
1439 let sqls = ["CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1) LOCATION 'foo.csv'",
1441 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 NULLS FIRST) LOCATION 'foo.csv'",
1442 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 NULLS LAST) LOCATION 'foo.csv'",
1443 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC) LOCATION 'foo.csv'",
1444 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC) LOCATION 'foo.csv'",
1445 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC NULLS FIRST) LOCATION 'foo.csv'",
1446 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC NULLS LAST) LOCATION 'foo.csv'",
1447 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC NULLS FIRST) LOCATION 'foo.csv'",
1448 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC NULLS LAST) LOCATION 'foo.csv'"];
1449 let expected = vec![
1450 (None, None),
1451 (None, Some(true)),
1452 (None, Some(false)),
1453 (Some(true), None),
1454 (Some(false), None),
1455 (Some(false), Some(true)),
1456 (Some(false), Some(false)),
1457 (Some(true), Some(true)),
1458 (Some(true), Some(false)),
1459 ];
1460 for (sql, (asc, nulls_first)) in sqls.iter().zip(expected.into_iter()) {
1461 let expected = Statement::CreateExternalTable(CreateExternalTable {
1462 name: name.clone(),
1463 columns: vec![make_column_def("c1", DataType::Int(None))],
1464 file_type: "CSV".to_string(),
1465 location: "foo.csv".into(),
1466 table_partition_cols: vec![],
1467 order_exprs: vec![vec![OrderByExpr {
1468 expr: Identifier(Ident {
1469 value: "c1".to_owned(),
1470 quote_style: None,
1471 span: Span::empty(),
1472 }),
1473 options: OrderByOptions { asc, nulls_first },
1474 with_fill: None,
1475 }]],
1476 if_not_exists: false,
1477 or_replace: false,
1478 temporary: false,
1479 unbounded: false,
1480 options: vec![],
1481 constraints: vec![],
1482 });
1483 expect_parse_ok(sql, expected)?;
1484 }
1485
1486 let sql = "CREATE EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV WITH ORDER (c1 ASC, c2 DESC NULLS FIRST) LOCATION 'foo.csv'";
1488 let display = None;
1489 let expected = Statement::CreateExternalTable(CreateExternalTable {
1490 name: name.clone(),
1491 columns: vec![
1492 make_column_def("c1", DataType::Int(display)),
1493 make_column_def("c2", DataType::Int(display)),
1494 ],
1495 file_type: "CSV".to_string(),
1496 location: "foo.csv".into(),
1497 table_partition_cols: vec![],
1498 order_exprs: vec![vec![
1499 OrderByExpr {
1500 expr: Identifier(Ident {
1501 value: "c1".to_owned(),
1502 quote_style: None,
1503 span: Span::empty(),
1504 }),
1505 options: OrderByOptions {
1506 asc: Some(true),
1507 nulls_first: None,
1508 },
1509 with_fill: None,
1510 },
1511 OrderByExpr {
1512 expr: Identifier(Ident {
1513 value: "c2".to_owned(),
1514 quote_style: None,
1515 span: Span::empty(),
1516 }),
1517 options: OrderByOptions {
1518 asc: Some(false),
1519 nulls_first: Some(true),
1520 },
1521 with_fill: None,
1522 },
1523 ]],
1524 if_not_exists: false,
1525 or_replace: false,
1526 temporary: false,
1527 unbounded: false,
1528 options: vec![],
1529 constraints: vec![],
1530 });
1531 expect_parse_ok(sql, expected)?;
1532
1533 let sql = "CREATE EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV WITH ORDER (c1 - c2 ASC) LOCATION 'foo.csv'";
1535 let display = None;
1536 let expected = Statement::CreateExternalTable(CreateExternalTable {
1537 name: name.clone(),
1538 columns: vec![
1539 make_column_def("c1", DataType::Int(display)),
1540 make_column_def("c2", DataType::Int(display)),
1541 ],
1542 file_type: "CSV".to_string(),
1543 location: "foo.csv".into(),
1544 table_partition_cols: vec![],
1545 order_exprs: vec![vec![OrderByExpr {
1546 expr: Expr::BinaryOp {
1547 left: Box::new(Identifier(Ident {
1548 value: "c1".to_owned(),
1549 quote_style: None,
1550 span: Span::empty(),
1551 })),
1552 op: BinaryOperator::Minus,
1553 right: Box::new(Identifier(Ident {
1554 value: "c2".to_owned(),
1555 quote_style: None,
1556 span: Span::empty(),
1557 })),
1558 },
1559 options: OrderByOptions {
1560 asc: Some(true),
1561 nulls_first: None,
1562 },
1563 with_fill: None,
1564 }]],
1565 if_not_exists: false,
1566 or_replace: false,
1567 temporary: false,
1568 unbounded: false,
1569 options: vec![],
1570 constraints: vec![],
1571 });
1572 expect_parse_ok(sql, expected)?;
1573
1574 let sql = "
1576 CREATE UNBOUNDED EXTERNAL TABLE IF NOT EXISTS t (c1 int, c2 float)
1577 STORED AS PARQUET
1578 WITH ORDER (c1 - c2 ASC)
1579 PARTITIONED BY (c1)
1580 LOCATION 'foo.parquet'
1581 OPTIONS ('format.compression' 'zstd',
1582 'format.delimiter' '*',
1583 'ROW_GROUP_SIZE' '1024',
1584 'TRUNCATE' 'NO',
1585 'format.has_header' 'true')";
1586 let expected = Statement::CreateExternalTable(CreateExternalTable {
1587 name: name.clone(),
1588 columns: vec![
1589 make_column_def("c1", DataType::Int(None)),
1590 make_column_def("c2", DataType::Float(ExactNumberInfo::None)),
1591 ],
1592 file_type: "PARQUET".to_string(),
1593 location: "foo.parquet".into(),
1594 table_partition_cols: vec!["c1".into()],
1595 order_exprs: vec![vec![OrderByExpr {
1596 expr: Expr::BinaryOp {
1597 left: Box::new(Identifier(Ident {
1598 value: "c1".to_owned(),
1599 quote_style: None,
1600 span: Span::empty(),
1601 })),
1602 op: BinaryOperator::Minus,
1603 right: Box::new(Identifier(Ident {
1604 value: "c2".to_owned(),
1605 quote_style: None,
1606 span: Span::empty(),
1607 })),
1608 },
1609 options: OrderByOptions {
1610 asc: Some(true),
1611 nulls_first: None,
1612 },
1613 with_fill: None,
1614 }]],
1615 if_not_exists: true,
1616 or_replace: false,
1617 temporary: false,
1618 unbounded: true,
1619 options: vec![
1620 (
1621 "format.compression".into(),
1622 Value::SingleQuotedString("zstd".into()),
1623 ),
1624 (
1625 "format.delimiter".into(),
1626 Value::SingleQuotedString("*".into()),
1627 ),
1628 (
1629 "ROW_GROUP_SIZE".into(),
1630 Value::SingleQuotedString("1024".into()),
1631 ),
1632 ("TRUNCATE".into(), Value::SingleQuotedString("NO".into())),
1633 (
1634 "format.has_header".into(),
1635 Value::SingleQuotedString("true".into()),
1636 ),
1637 ],
1638 constraints: vec![],
1639 });
1640 expect_parse_ok(sql, expected)?;
1641
1642 let sql = "
1644 CREATE OR REPLACE UNBOUNDED EXTERNAL TABLE t (c1 int, c2 float)
1645 STORED AS PARQUET
1646 WITH ORDER (c1 - c2 ASC)
1647 PARTITIONED BY (c1)
1648 LOCATION 'foo.parquet'
1649 OPTIONS ('format.compression' 'zstd',
1650 'format.delimiter' '*',
1651 'ROW_GROUP_SIZE' '1024',
1652 'TRUNCATE' 'NO',
1653 'format.has_header' 'true')";
1654 let expected = Statement::CreateExternalTable(CreateExternalTable {
1655 name: name.clone(),
1656 columns: vec![
1657 make_column_def("c1", DataType::Int(None)),
1658 make_column_def("c2", DataType::Float(ExactNumberInfo::None)),
1659 ],
1660 file_type: "PARQUET".to_string(),
1661 location: "foo.parquet".into(),
1662 table_partition_cols: vec!["c1".into()],
1663 order_exprs: vec![vec![OrderByExpr {
1664 expr: Expr::BinaryOp {
1665 left: Box::new(Identifier(Ident {
1666 value: "c1".to_owned(),
1667 quote_style: None,
1668 span: Span::empty(),
1669 })),
1670 op: BinaryOperator::Minus,
1671 right: Box::new(Identifier(Ident {
1672 value: "c2".to_owned(),
1673 quote_style: None,
1674 span: Span::empty(),
1675 })),
1676 },
1677 options: OrderByOptions {
1678 asc: Some(true),
1679 nulls_first: None,
1680 },
1681 with_fill: None,
1682 }]],
1683 if_not_exists: false,
1684 or_replace: true,
1685 temporary: false,
1686 unbounded: true,
1687 options: vec![
1688 (
1689 "format.compression".into(),
1690 Value::SingleQuotedString("zstd".into()),
1691 ),
1692 (
1693 "format.delimiter".into(),
1694 Value::SingleQuotedString("*".into()),
1695 ),
1696 (
1697 "ROW_GROUP_SIZE".into(),
1698 Value::SingleQuotedString("1024".into()),
1699 ),
1700 ("TRUNCATE".into(), Value::SingleQuotedString("NO".into())),
1701 (
1702 "format.has_header".into(),
1703 Value::SingleQuotedString("true".into()),
1704 ),
1705 ],
1706 constraints: vec![],
1707 });
1708 expect_parse_ok(sql, expected)?;
1709
1710 Ok(())
1713 }
1714
1715 #[test]
1716 fn copy_to_table_to_table() -> Result<(), DataFusionError> {
1717 let sql = "COPY foo TO bar STORED AS CSV";
1719 let expected = Statement::CopyTo(CopyToStatement {
1720 source: object_name("foo"),
1721 target: "bar".to_string(),
1722 partitioned_by: vec![],
1723 stored_as: Some("CSV".to_owned()),
1724 options: vec![],
1725 });
1726
1727 assert_eq!(verified_stmt(sql), expected);
1728 Ok(())
1729 }
1730
1731 #[test]
1732 fn skip_copy_into_snowflake() -> Result<(), DataFusionError> {
1733 let sql = "COPY INTO foo FROM @~/staged FILE_FORMAT = (FORMAT_NAME = 'mycsv');";
1734 let dialect = Box::new(SnowflakeDialect);
1735 let statements = DFParser::parse_sql_with_dialect(sql, dialect.as_ref())?;
1736
1737 assert_eq!(
1738 statements.len(),
1739 1,
1740 "Expected to parse exactly one statement"
1741 );
1742 if let Statement::CopyTo(_) = &statements[0] {
1743 panic!("Expected non COPY TO statement, but was successful: {statements:?}");
1744 }
1745 Ok(())
1746 }
1747
1748 #[test]
1749 fn explain_copy_to_table_to_table() -> Result<(), DataFusionError> {
1750 let cases = vec![
1751 ("EXPLAIN COPY foo TO bar STORED AS PARQUET", false, false),
1752 (
1753 "EXPLAIN ANALYZE COPY foo TO bar STORED AS PARQUET",
1754 true,
1755 false,
1756 ),
1757 (
1758 "EXPLAIN VERBOSE COPY foo TO bar STORED AS PARQUET",
1759 false,
1760 true,
1761 ),
1762 (
1763 "EXPLAIN ANALYZE VERBOSE COPY foo TO bar STORED AS PARQUET",
1764 true,
1765 true,
1766 ),
1767 ];
1768 for (sql, analyze, verbose) in cases {
1769 println!("sql: {sql}, analyze: {analyze}, verbose: {verbose}");
1770
1771 let expected_copy = Statement::CopyTo(CopyToStatement {
1772 source: object_name("foo"),
1773 target: "bar".to_string(),
1774 partitioned_by: vec![],
1775 stored_as: Some("PARQUET".to_owned()),
1776 options: vec![],
1777 });
1778 let expected = Statement::Explain(ExplainStatement {
1779 analyze,
1780 verbose,
1781 format: None,
1782 statement: Box::new(expected_copy),
1783 });
1784 assert_eq!(verified_stmt(sql), expected);
1785 }
1786 Ok(())
1787 }
1788
1789 #[test]
1790 fn copy_to_query_to_table() -> Result<(), DataFusionError> {
1791 let statement = verified_stmt("SELECT 1");
1792
1793 let statement = if let Statement::Statement(statement) = statement {
1795 *statement
1796 } else {
1797 panic!("Expected statement, got {statement:?}");
1798 };
1799
1800 let query = if let SQLStatement::Query(query) = statement {
1801 query
1802 } else {
1803 panic!("Expected query, got {statement:?}");
1804 };
1805
1806 let sql =
1807 "COPY (SELECT 1) TO bar STORED AS CSV OPTIONS ('format.has_header' 'true')";
1808 let expected = Statement::CopyTo(CopyToStatement {
1809 source: CopyToSource::Query(query),
1810 target: "bar".to_string(),
1811 partitioned_by: vec![],
1812 stored_as: Some("CSV".to_owned()),
1813 options: vec![(
1814 "format.has_header".into(),
1815 Value::SingleQuotedString("true".into()),
1816 )],
1817 });
1818 assert_eq!(verified_stmt(sql), expected);
1819 Ok(())
1820 }
1821
1822 #[test]
1823 fn copy_to_options() -> Result<(), DataFusionError> {
1824 let sql = "COPY foo TO bar STORED AS CSV OPTIONS ('row_group_size' '55')";
1825 let expected = Statement::CopyTo(CopyToStatement {
1826 source: object_name("foo"),
1827 target: "bar".to_string(),
1828 partitioned_by: vec![],
1829 stored_as: Some("CSV".to_owned()),
1830 options: vec![(
1831 "row_group_size".to_string(),
1832 Value::SingleQuotedString("55".to_string()),
1833 )],
1834 });
1835 assert_eq!(verified_stmt(sql), expected);
1836 Ok(())
1837 }
1838
1839 #[test]
1840 fn copy_to_partitioned_by() -> Result<(), DataFusionError> {
1841 let sql = "COPY foo TO bar STORED AS CSV PARTITIONED BY (a) OPTIONS ('row_group_size' '55')";
1842 let expected = Statement::CopyTo(CopyToStatement {
1843 source: object_name("foo"),
1844 target: "bar".to_string(),
1845 partitioned_by: vec!["a".to_string()],
1846 stored_as: Some("CSV".to_owned()),
1847 options: vec![(
1848 "row_group_size".to_string(),
1849 Value::SingleQuotedString("55".to_string()),
1850 )],
1851 });
1852 assert_eq!(verified_stmt(sql), expected);
1853 Ok(())
1854 }
1855
1856 #[test]
1857 fn copy_to_multi_options() -> Result<(), DataFusionError> {
1858 let sql =
1860 "COPY foo TO bar STORED AS parquet OPTIONS ('format.row_group_size' 55, 'format.compression' snappy, 'execution.keep_partition_by_columns' true)";
1861
1862 let expected_options = vec![
1863 (
1864 "format.row_group_size".to_string(),
1865 Value::Number("55".to_string(), false),
1866 ),
1867 (
1868 "format.compression".to_string(),
1869 Value::SingleQuotedString("snappy".to_string()),
1870 ),
1871 (
1872 "execution.keep_partition_by_columns".to_string(),
1873 Value::SingleQuotedString("true".to_string()),
1874 ),
1875 ];
1876
1877 let mut statements = DFParser::parse_sql(sql).unwrap();
1878 assert_eq!(statements.len(), 1);
1879 let only_statement = statements.pop_front().unwrap();
1880
1881 let options = if let Statement::CopyTo(copy_to) = only_statement {
1882 copy_to.options
1883 } else {
1884 panic!("Expected copy");
1885 };
1886
1887 assert_eq!(options, expected_options);
1888
1889 Ok(())
1890 }
1891
1892 fn object_name(name: &str) -> CopyToSource {
1895 CopyToSource::Relation(ObjectName::from(vec![Ident::new(name)]))
1896 }
1897
1898 fn one_statement_parses_to(sql: &str, canonical: &str) -> Statement {
1912 let mut statements = DFParser::parse_sql(sql).unwrap();
1913 assert_eq!(statements.len(), 1);
1914
1915 if sql != canonical {
1916 assert_eq!(DFParser::parse_sql(canonical).unwrap(), statements);
1917 }
1918
1919 let only_statement = statements.pop_front().unwrap();
1920 assert_eq!(
1921 canonical.to_uppercase(),
1922 only_statement.to_string().to_uppercase()
1923 );
1924 only_statement
1925 }
1926
1927 fn verified_stmt(sql: &str) -> Statement {
1931 one_statement_parses_to(sql, sql)
1932 }
1933
1934 #[test]
1935 fn test_recursion_limit() {
1938 let sql = "SELECT 1 OR 2";
1939
1940 DFParserBuilder::new(sql)
1942 .build()
1943 .unwrap()
1944 .parse_statements()
1945 .unwrap();
1946
1947 let err = DFParserBuilder::new(sql)
1948 .with_recursion_limit(1)
1949 .build()
1950 .unwrap()
1951 .parse_statements()
1952 .unwrap_err();
1953
1954 assert_contains!(
1955 err.to_string(),
1956 "SQL error: RecursionLimitExceeded (current limit: 1)"
1957 );
1958 }
1959
1960 fn expect_parse_expr_ok(sql: &str, expected: ExprWithAlias) {
1961 let expr = DFParser::parse_sql_into_expr(sql).unwrap();
1962 assert_eq!(expr, expected, "actual:\n{expr:#?}");
1963 }
1964
1965 fn expect_parse_expr_error(sql: &str, expected_error: &str) {
1967 match DFParser::parse_sql_into_expr(sql) {
1968 Ok(expr) => {
1969 panic!("Expected parse error for '{sql}', but was successful: {expr:#?}");
1970 }
1971 Err(e) => {
1972 let error_message = e.to_string();
1973 assert!(
1974 error_message.contains(expected_error),
1975 "Expected error '{expected_error}' not found in actual error '{error_message}'"
1976 );
1977 }
1978 }
1979 }
1980
1981 #[test]
1982 fn literal() {
1983 expect_parse_expr_ok(
1984 "1234",
1985 ExprWithAlias {
1986 expr: Expr::Value(ValueWithSpan::from(Value::Number(
1987 "1234".to_string(),
1988 false,
1989 ))),
1990 alias: None,
1991 },
1992 )
1993 }
1994
1995 #[test]
1996 fn literal_with_alias() {
1997 expect_parse_expr_ok(
1998 "1234 as foo",
1999 ExprWithAlias {
2000 expr: Expr::Value(ValueWithSpan::from(Value::Number(
2001 "1234".to_string(),
2002 false,
2003 ))),
2004 alias: Some(Ident::from("foo")),
2005 },
2006 )
2007 }
2008
2009 #[test]
2010 fn literal_with_alias_and_trailing_tokens() {
2011 expect_parse_expr_error(
2012 "1234 as foo.bar",
2013 "Expected: end of expression, found: .",
2014 )
2015 }
2016
2017 #[test]
2018 fn literal_with_alias_and_trailing_whitespace() {
2019 expect_parse_expr_ok(
2020 "1234 as foo ",
2021 ExprWithAlias {
2022 expr: Expr::Value(ValueWithSpan::from(Value::Number(
2023 "1234".to_string(),
2024 false,
2025 ))),
2026 alias: Some(Ident::from("foo")),
2027 },
2028 )
2029 }
2030
2031 #[test]
2032 fn literal_with_alias_and_trailing_whitespace_and_token() {
2033 expect_parse_expr_error(
2034 "1234 as foo bar",
2035 "Expected: end of expression, found: bar",
2036 )
2037 }
2038}