datafusion/test_util/
csv.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Helpers for writing csv files and reading them back
19
20use std::fs::File;
21use std::path::PathBuf;
22use std::sync::Arc;
23
24use crate::arrow::{datatypes::SchemaRef, record_batch::RecordBatch};
25use crate::error::Result;
26
27use arrow::csv::WriterBuilder;
28
29///  a CSV file that has been created for testing.
30pub struct TestCsvFile {
31    path: PathBuf,
32    schema: SchemaRef,
33}
34
35impl TestCsvFile {
36    /// Creates a new csv file at the specified location
37    pub fn try_new(
38        path: PathBuf,
39        batches: impl IntoIterator<Item = RecordBatch>,
40    ) -> Result<Self> {
41        let file = File::create(&path).unwrap();
42        let builder = WriterBuilder::new().with_header(true);
43        let mut writer = builder.build(file);
44
45        let mut batches = batches.into_iter();
46        let first_batch = batches.next().expect("need at least one record batch");
47        let schema = first_batch.schema();
48
49        let mut num_rows = 0;
50        for batch in batches {
51            writer.write(&batch)?;
52            num_rows += batch.num_rows();
53        }
54
55        println!("Generated test dataset with {num_rows} rows");
56
57        Ok(Self { path, schema })
58    }
59
60    /// The schema of this csv file
61    pub fn schema(&self) -> SchemaRef {
62        Arc::clone(&self.schema)
63    }
64
65    /// The path to the csv file
66    pub fn path(&self) -> &std::path::Path {
67        self.path.as_path()
68    }
69}