Struct SpillManager

Source

pub struct SpillManager {
    env: Arc<RuntimeEnv>,
    pub(crate) metrics: SpillMetrics,
    schema: SchemaRef,
    batch_read_buffer_capacity: usize,
    pub(crate) compression: SpillCompression,
}

Expand description

The SpillManager is responsible for the following tasks:

Reading and writing RecordBatches to raw files based on the provided configurations.
Updating the associated metrics.

Note: The caller (external operators such as SortExec) is responsible for interpreting the spilled files. For example, all records within the same spill file are ordered according to a specific order.

Fields§

§env: Arc<RuntimeEnv>§metrics: SpillMetrics§schema: SchemaRef§batch_read_buffer_capacity: usize

Number of batches to buffer in memory during disk reads

§compression: SpillCompression

general-purpose compression options

Implementations§

Source §

impl SpillManager

Source

pub fn new( env: Arc<RuntimeEnv>, metrics: SpillMetrics, schema: SchemaRef, ) -> Self

Source

pub fn with_batch_read_buffer_capacity( self, batch_read_buffer_capacity: usize, ) -> Self

Source

pub fn with_compression_type(self, spill_compression: SpillCompression) -> Self

Source

pub fn create_in_progress_file( &self, request_msg: &str, ) -> Result<InProgressSpillFile>

Creates a temporary file for in-progress operations, returning an error message if file creation fails. The file can be used to append batches incrementally and then finish the file when done.

Source

pub fn spill_record_batch_and_finish( &self, batches: &[RecordBatch], request_msg: &str, ) -> Result<Option<RefCountedTempFile>>

Spill input batches into a single file in a atomic operation. If it is intended to incrementally write in-memory batches into the same spill file, use Self::create_in_progress_file instead. None is returned if no batches are spilled.

§Errors

Returns an error if spilling would exceed the disk usage limit configured by max_temp_directory_size in DiskManager

Source

pub(crate) fn spill_record_batch_by_size_and_return_max_batch_memory( &self, batch: &RecordBatch, request_description: &str, row_limit: usize, ) -> Result<Option<(RefCountedTempFile, usize)>>

Refer to the documentation for Self::spill_record_batch_and_finish. This method additionally spills the RecordBatch into smaller batches, divided by row_limit.

§Errors

Returns an error if spilling would exceed the disk usage limit configured by max_temp_directory_size in DiskManager

Source

pub(crate) async fn spill_record_batch_stream_and_return_max_batch_memory( &self, stream: &mut SendableRecordBatchStream, request_description: &str, ) -> Result<Option<(RefCountedTempFile, usize)>>

Spill a stream of RecordBatches to disk and return the spill file and the size of the largest batch in memory

Source

pub fn read_spill_as_stream( &self, spill_file_path: RefCountedTempFile, max_record_batch_memory: Option<usize>, ) -> Result<SendableRecordBatchStream>

Reads a spill file as a stream. The file must be created by the current SpillManager. This method will generate output in FIFO order: the batch appended first will be read first.

Trait Implementations§

Source §

impl Clone for SpillManager

Source §

fn clone(&self) -> SpillManager

Returns a duplicate of the value. Read more

1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more

Source §

impl Debug for SpillManager

Source §

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

Auto Trait Implementations§

§

impl !UnwindSafe for SpillManager

Blanket Implementations§

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<T> CloneToUninit for T
where T: Clone,

Source §

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)

Performs copy-assignment from self to dest. Read more

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

§

impl<T> Instrument for T

§

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided [Span], returning an Instrumented wrapper. Read more

§

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T> IntoEither for T

Source §

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more

Source §

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more

§

impl<T> PolicyExt for T
where T: ?Sized,

§

fn and<P, B, E>(self, other: P) -> And<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

Create a new Policy that returns [Action::Follow] only if self and other return Action::Follow. Read more

§

fn or<P, B, E>(self, other: P) -> Or<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

Create a new Policy that returns [Action::Follow] if either self or other returns Action::Follow. Read more

Source §

impl<T> Same for T

Source §

type Output = T

Should always be Self

Source §

impl<T> ToOwned for T
where T: Clone,

Source §

type Owned = T

The resulting type after obtaining ownership.

Source §

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more

Source §

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more

Source §

impl<T, U> TryFrom for T
where U: Into<T>,

Source §

type Error = Infallible

The type returned in the event of a conversion error.

Source §

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

Source §

impl<T, U> TryInto for T
where U: TryFrom<T>,

Source §

type Error = >::Error

The type returned in the event of a conversion error.

Source §

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

§

fn vzip(self) -> V

§

impl<T> WithSubscriber for T

§

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a [WithDispatch] wrapper. Read more

§

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a [WithDispatch] wrapper. Read more

§

Struct SpillManager Copy item path

Fields§

Implementations§

impl SpillManager

pub fn new( env: Arc<RuntimeEnv>, metrics: SpillMetrics, schema: SchemaRef, ) -> Self

pub fn with_batch_read_buffer_capacity( self, batch_read_buffer_capacity: usize, ) -> Self

pub fn with_compression_type(self, spill_compression: SpillCompression) -> Self

pub fn create_in_progress_file( &self, request_msg: &str, ) -> Result<InProgressSpillFile>

pub fn spill_record_batch_and_finish( &self, batches: &[RecordBatch], request_msg: &str, ) -> Result<Option<RefCountedTempFile>>

§Errors

pub(crate) fn spill_record_batch_by_size_and_return_max_batch_memory( &self, batch: &RecordBatch, request_description: &str, row_limit: usize, ) -> Result<Option<(RefCountedTempFile, usize)>>

§Errors

pub(crate) async fn spill_record_batch_stream_and_return_max_batch_memory( &self, stream: &mut SendableRecordBatchStream, request_description: &str, ) -> Result<Option<(RefCountedTempFile, usize)>>

pub fn read_spill_as_stream( &self, spill_file_path: RefCountedTempFile, max_record_batch_memory: Option<usize>, ) -> Result<SendableRecordBatchStream>

Trait Implementations§

impl Clone for SpillManager

fn clone(&self) -> SpillManager

fn clone_from(&mut self, source: &Self)

impl Debug for SpillManager

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Auto Trait Implementations§

impl Freeze for SpillManager

impl !RefUnwindSafe for SpillManager

impl Send for SpillManager

impl Sync for SpillManager

impl Unpin for SpillManager

impl !UnwindSafe for SpillManager

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> IntoEither for T

fn into_either(self, into_left: bool) -> Either<Self, Self>

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>where F: FnOnce(&Self) -> bool,

impl<T> PolicyExt for Twhere T: ?Sized,

fn and<P, B, E>(self, other: P) -> And<T, P>where T: Policy<B, E>, P: Policy<B, E>,

fn or<P, B, E>(self, other: P) -> Or<T, P>where T: Policy<B, E>, P: Policy<B, E>,

impl<T> Same for T

type Output = T

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

impl<T> WithSubscriber for T

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>where S: Into<Dispatch>,

fn with_current_subscriber(self) -> WithDispatch<Self>

impl<T> ErasedDestructor for Twhere T: 'static,

Struct SpillManager

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

impl<T> PolicyExt for T
where T: ?Sized,

fn and<P, B, E>(self, other: P) -> And<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

fn or<P, B, E>(self, other: P) -> Or<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

impl<T> ErasedDestructor for T
where T: 'static,