diff --git a/vortex-array/src/arrays/patched/array.rs b/vortex-array/src/arrays/patched/array.rs index 9c0d541f5db..72130233b78 100644 --- a/vortex-array/src/arrays/patched/array.rs +++ b/vortex-array/src/arrays/patched/array.rs @@ -27,71 +27,6 @@ use crate::patches::Patches; use crate::stats::ArrayStats; use crate::validity::Validity; -/// An array that partially "patches" another array with new values. -/// -/// # Background -/// -/// This is meant to be the foundation of a fully data-parallel patching strategy, based on the -/// work published in ["G-ALP" from Hepkema et al.](https://ir.cwi.nl/pub/35205/35205.pdf) -/// -/// Patching is common when an encoding almost completely covers an array save a few exceptions. -/// In that case, rather than avoid the encoding entirely, it's preferable to -/// -/// * Replace unencodable values with fillers (zeros, frequent values, nulls, etc.) -/// * Wrap the array with a `PatchedArray` signaling that when the original array is executed, -/// some of the decoded values must be overwritten. -/// -/// In Vortex, the FastLanes bit-packing encoding is often the terminal node in an encoding tree, -/// and FastLanes has an intrinsic chunking of 1024 elements. Thus, 1024 elements is pervasively -/// a useful unit of chunking throughout Vortex, and so we use 1024 as a chunk size here -/// as well. -/// -/// # Details -/// -/// To patch an array, we first divide it into a set of chunks of length 1024, and then within -/// each chunk, we assign each position to a lane. The number of lanes depends on the width of -/// the underlying type. -/// -/// Thus, rather than sorting patch indices and values by their global offset, they are sorted -/// primarily by their chunk, and then subsequently by their lanes. -/// -/// The Patched array layout has 4 children -/// -/// * `inner`: the inner array is the one containing encoded values, including the filler values -/// that need to be patched over at execution time -/// * `lane_offsets`: this is an indexing buffer that allows you to see into ranges of the other -/// two children -/// * `indices`: An array of `u16` chunk indices, indicating where within the chunk should the value -/// be overwritten by the patch value -/// * `values`: The child array containing the patch values, which should be inserted over -/// the values of the `inner` at the locations provided by `indices` -/// -/// `indices` and `values` are aligned and accessed together. -/// -/// ```text -/// -/// chunk 0 chunk 0 chunk 0 chunk 0 chunk 0 chunk 0 -/// lane 0 lane 1 lane 2 lane 3 lane 4 lane 5 -/// ┌────────────┬────────────┬────────────┬────────────┬────────────┬────────────┐ -/// lane_offsets │ 0 │ 0 │ 2 │ 2 │ 3 │ 5 │ ... -/// └─────┬──────┴─────┬──────┴─────┬──────┴──────┬─────┴──────┬─────┴──────┬─────┘ -/// │ │ │ │ │ │ -/// │ │ │ │ │ │ -/// ┌─────┴────────────┘ └──────┬──────┘ ┌──────┘ └─────┐ -/// │ │ │ │ -/// │ │ │ │ -/// │ │ │ │ -/// ▼────────────┬────────────┬────────────▼────────────▼────────────┬────────────▼ -/// indices │ │ │ │ │ │ │ -/// │ │ │ │ │ │ │ -/// ├────────────┼────────────┼────────────┼────────────┼────────────┼────────────┤ -/// values │ │ │ │ │ │ │ -/// │ │ │ │ │ │ │ -/// └────────────┴────────────┴────────────┴────────────┴────────────┴────────────┘ -/// ``` -/// -/// It turns out that this layout is optimal for executing patching on GPUs, because the -/// `lane_offsets` allows each thread in a warp to seek to its patches in constant time. /// The inner array containing the base unpatched values. pub(super) const INNER_SLOT: usize = 0; /// The lane offsets array for locating patches within lanes. diff --git a/vortex-array/src/arrays/patched/mod.rs b/vortex-array/src/arrays/patched/mod.rs index 48a5b850c6b..56024f50d8e 100644 --- a/vortex-array/src/arrays/patched/mod.rs +++ b/vortex-array/src/arrays/patched/mod.rs @@ -1,6 +1,72 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +//! An array that partially "patches" another array with new values. +//! +//! # Background +//! +//! This is meant to be the foundation of a fully data-parallel patching strategy, based on the +//! work published in ["G-ALP" from Hepkema et al.](https://ir.cwi.nl/pub/35205/35205.pdf) +//! +//! Patching is common when an encoding almost completely covers an array save a few exceptions. +//! In that case, rather than avoid the encoding entirely, it's preferable to +//! +//! * Replace unencodable values with fillers (zeros, frequent values, nulls, etc.) +//! * Wrap the array with a `PatchedArray` signaling that when the original array is executed, +//! some of the decoded values must be overwritten. +//! +//! In Vortex, the FastLanes bit-packing encoding is often the terminal node in an encoding tree, +//! and FastLanes has an intrinsic chunking of 1024 elements. Thus, 1024 elements is pervasively +//! a useful unit of chunking throughout Vortex, and so we use 1024 as a chunk size here +//! as well. +//! +//! # Details +//! +//! To patch an array, we first divide it into a set of chunks of length 1024, and then within +//! each chunk, we assign each position to a lane. The number of lanes depends on the width of +//! the underlying type. +//! +//! Thus, rather than sorting patch indices and values by their global offset, they are sorted +//! primarily by their chunk, and then subsequently by their lanes. +//! +//! The Patched array layout has 4 children +//! +//! * `inner`: the inner array is the one containing encoded values, including the filler values +//! that need to be patched over at execution time +//! * `lane_offsets`: this is an indexing buffer that allows you to see into ranges of the other +//! two children +//! * `indices`: An array of `u16` chunk indices, indicating where within the chunk should the value +//! be overwritten by the patch value +//! * `values`: The child array containing the patch values, which should be inserted over +//! the values of the `inner` at the locations provided by `indices` +//! +//! `indices` and `values` are aligned and accessed together. +//! +//! ```text +//! +//! chunk 0 chunk 0 chunk 0 chunk 0 chunk 0 chunk 0 +//! lane 0 lane 1 lane 2 lane 3 lane 4 lane 5 +//! ┌────────────┬────────────┬────────────┬────────────┬────────────┬────────────┐ +//! lane_offsets │ 0 │ 0 │ 2 │ 2 │ 3 │ 5 │ ... +//! └─────┬──────┴─────┬──────┴─────┬──────┴──────┬─────┴──────┬─────┴──────┬─────┘ +//! │ │ │ │ │ │ +//! │ │ │ │ │ │ +//! ┌─────┴────────────┘ └──────┬──────┘ ┌──────┘ └─────┐ +//! │ │ │ │ +//! │ │ │ │ +//! │ │ │ │ +//! ▼────────────┬────────────┬────────────▼────────────▼────────────┬────────────▼ +//! indices │ │ │ │ │ │ │ +//! │ │ │ │ │ │ │ +//! ├────────────┼────────────┼────────────┼────────────┼────────────┼────────────┤ +//! values │ │ │ │ │ │ │ +//! │ │ │ │ │ │ │ +//! └────────────┴────────────┴────────────┴────────────┴────────────┴────────────┘ +//! ``` +//! +//! It turns out that this layout is optimal for executing patching on GPUs, because the +//! `lane_offsets` allows each thread in a warp to seek to its patches in constant time. + mod array; mod compute; mod vtable;