datafusion/test_util/csv.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Helpers for writing csv files and reading them back
19
20use std::fs::File;
21use std::path::PathBuf;
22use std::sync::Arc;
23
24use crate::arrow::{datatypes::SchemaRef, record_batch::RecordBatch};
25use crate::error::Result;
26
27use arrow::csv::WriterBuilder;
28
29/// a CSV file that has been created for testing.
30pub struct TestCsvFile {
31 path: PathBuf,
32 schema: SchemaRef,
33}
34
35impl TestCsvFile {
36 /// Creates a new csv file at the specified location
37 pub fn try_new(
38 path: PathBuf,
39 batches: impl IntoIterator<Item = RecordBatch>,
40 ) -> Result<Self> {
41 let file = File::create(&path).unwrap();
42 let builder = WriterBuilder::new().with_header(true);
43 let mut writer = builder.build(file);
44
45 let mut batches = batches.into_iter();
46 let first_batch = batches.next().expect("need at least one record batch");
47 let schema = first_batch.schema();
48
49 let mut num_rows = 0;
50 for batch in batches {
51 writer.write(&batch)?;
52 num_rows += batch.num_rows();
53 }
54
55 println!("Generated test dataset with {num_rows} rows");
56
57 Ok(Self { path, schema })
58 }
59
60 /// The schema of this csv file
61 pub fn schema(&self) -> SchemaRef {
62 Arc::clone(&self.schema)
63 }
64
65 /// The path to the csv file
66 pub fn path(&self) -> &std::path::Path {
67 self.path.as_path()
68 }
69}