Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
96 commits
Select commit Hold shift + click to select a range
9a1b8a4
feat[turboquant]: add TurboQuant vector quantization encoding
lwwmanning Mar 25, 2026
a888e1b
feat[turboquant]: add TurboQuantCompressor and WriteStrategyBuilder i…
lwwmanning Mar 25, 2026
22b2c26
refactor[turboquant]: integrate into BtrBlocks compressor directly
lwwmanning Mar 25, 2026
4abd910
bench[turboquant]: add compression/decompression throughput benchmarks
lwwmanning Mar 25, 2026
ca0c7ff
perf[turboquant]: replace dense rotation with randomized Hadamard tra…
lwwmanning Mar 25, 2026
303b893
test[turboquant]: add theoretical error bound and inner product bias …
lwwmanning Mar 25, 2026
c4ca3a4
chore[turboquant]: fix review issues and generate public-api.lock
lwwmanning Mar 25, 2026
5d73462
chore[turboquant]: review cleanup — tighter tests, naming, validation
lwwmanning Mar 25, 2026
08e1c14
docs[turboquant]: add crate-level docs with compression ratios and er…
lwwmanning Mar 25, 2026
53805d5
feat[turboquant]: support 1-8 bit quantization
lwwmanning Mar 25, 2026
dbc8f43
feat[turboquant]: support 9-bit Prod for tensor core int8 GEMM
lwwmanning Mar 25, 2026
6b9c0a1
bench[turboquant]: add dim 1024 and 1536 benchmarks
lwwmanning Mar 26, 2026
8a6af98
feat[turboquant]: add rotation sign export/import and hot-path inverse
lwwmanning Mar 26, 2026
67e43f3
feat[turboquant]: define TurboQuantMSEArray and TurboQuantQJLArray
lwwmanning Mar 26, 2026
143dad3
feat[turboquant]: add new compression functions for cascaded arrays
lwwmanning Mar 26, 2026
141b85d
refactor[btrblocks]: simplify TurboQuant compressor for cascaded arrays
lwwmanning Mar 26, 2026
c122fbb
chore[turboquant]: regenerate public-api.lock for new array types
lwwmanning Mar 26, 2026
1946cf4
refactor[turboquant]: restructure into subdirectory modules, delete d…
lwwmanning Mar 26, 2026
8f377d8
test[turboquant]: improve test coverage and add explanatory comments
lwwmanning Mar 26, 2026
f47032b
perf[turboquant]: restore fast SIMD-friendly decode by expanding stor…
lwwmanning Mar 26, 2026
5882ef7
fix[turboquant]: address PR review findings
lwwmanning Mar 27, 2026
3de2430
fix[turboquant]: second-round review fixes and merge conflict resolution
lwwmanning Mar 27, 2026
44aecb1
refactor[turboquant]: simplify code from review findings
lwwmanning Mar 27, 2026
e83aa5f
fix[turboquant]: address PR review comments from AdamGS
lwwmanning Mar 27, 2026
dfc79ef
chore[turboquant]: cleanup from second simplify pass
lwwmanning Mar 27, 2026
727ed1c
chore[turboquant]: address review — hot loop opts, tests, perf TODOs
lwwmanning Mar 28, 2026
acab517
cleanup
lwwmanning Mar 30, 2026
f761bb9
cleanup
lwwmanning Mar 30, 2026
e66b700
cleanup
lwwmanning Mar 30, 2026
feb3033
refactor
lwwmanning Mar 30, 2026
a3a3f53
wip on refactoring
lwwmanning Mar 30, 2026
1f6a3f8
claude fixed my stuff
lwwmanning Mar 30, 2026
86365a3
merge TQ back into single array with option QJL correction
lwwmanning Mar 30, 2026
c1fdffb
wip
lwwmanning Mar 30, 2026
c6a9251
more
lwwmanning Mar 30, 2026
16fa772
samply optimizations
lwwmanning Mar 30, 2026
93b65bf
truncation
lwwmanning Mar 30, 2026
1c08d95
cleanup
lwwmanning Mar 30, 2026
09473e6
share rotation matrix between MSE and QJL
lwwmanning Mar 30, 2026
57a4915
Revert "share rotation matrix between MSE and QJL"
lwwmanning Mar 30, 2026
2b3f085
holy moly simd
lwwmanning Mar 30, 2026
f6f366b
fix review comments
lwwmanning Mar 30, 2026
c3338b0
add turboquant compute and refactor to use FSL children internally
lwwmanning Mar 30, 2026
eb3c7e5
review
lwwmanning Mar 30, 2026
2a9caa0
branchless sign expansion
lwwmanning Mar 31, 2026
600591b
taplo + public-api.lock
lwwmanning Mar 31, 2026
9b76d48
docs
lwwmanning Mar 31, 2026
ad9435e
typos and doctest fixes
lwwmanning Mar 31, 2026
8eec92f
slots
lwwmanning Mar 31, 2026
11d059d
slots2
lwwmanning Mar 31, 2026
290dd62
move stuff around
connortsui20 Mar 20, 2026
10b1ee7
wip on integrating pluggable compressor, moving vortex-turboquant int…
lwwmanning Mar 31, 2026
f9a6637
wip on integrating pluggable compressor, moving vortex-turboquant int…
lwwmanning Mar 31, 2026
330e54e
unstable_encodings for turboquant
lwwmanning Mar 31, 2026
e94b47b
unstable_encodings for benchmarks
lwwmanning Mar 31, 2026
496ddd7
Merge remote-tracking branch 'origin/develop' into claude/admiring-li…
lwwmanning Mar 31, 2026
b57a7f2
max effort review fixes
lwwmanning Mar 31, 2026
a928727
wip on pluggable compressor cleanup
lwwmanning Mar 31, 2026
54b158c
Revert "wip on pluggable compressor cleanup"
lwwmanning Mar 31, 2026
00ee4fe
permutation
lwwmanning Mar 31, 2026
a831042
Revert "permutation"
lwwmanning Mar 31, 2026
2c5017a
Reapply "wip on pluggable compressor cleanup"
lwwmanning Mar 31, 2026
93e5dc7
fixing biases with empirical distribution
lwwmanning Mar 31, 2026
9e17811
clean up pluggable compressing some more
lwwmanning Mar 31, 2026
bd3fc5f
no more empirical distribution
lwwmanning Mar 31, 2026
91e653f
fix[vortex-array]: update an overflow test (#7229)
asubiotto Mar 31, 2026
69a61f1
add ROTATION_STRATEGY.md
lwwmanning Apr 1, 2026
822bd4a
Add compressor for constant nonnullable and all valid bool arrays (#7…
robert3005 Mar 31, 2026
c2dd0c8
chore: have on demand validity and patches for array remove slot extr…
joseph-isaacs Apr 1, 2026
c558ace
buffered strategy to not use eof for the final chunk (#7219)
onursatici Apr 1, 2026
7698bd8
skip[ci]: wait for sccache in actions (#7237)
joseph-isaacs Apr 1, 2026
6d5f832
Remove deprecated compute traits (#7231)
gatesn Apr 1, 2026
9cc3e9e
Fill out a few small pieces in Variant (#7209)
AdamGS Apr 1, 2026
a1d5b71
fix: fix typo in compressor scheme (#7241)
joseph-isaacs Apr 1, 2026
82d26c5
Fix semantic conflict with array slots (#7243)
robert3005 Apr 1, 2026
b3de15b
Support partitionBy in VortexSparkDataSource (#7218)
robert3005 Apr 1, 2026
469d4af
remove deprecated StructStrategy (#7242)
a10y Apr 1, 2026
fec540f
Revert "add ROTATION_STRATEGY.md"
lwwmanning Apr 1, 2026
b67170e
taplo
lwwmanning Apr 1, 2026
cf30f01
dead code
lwwmanning Apr 1, 2026
594b4a3
wire in tq compute
lwwmanning Apr 1, 2026
2bd20ad
Merge remote-tracking branch 'origin/develop' into claude/admiring-li…
lwwmanning Apr 1, 2026
e4c8b9c
DCO Remediation Commit for Will Manning <will@willmanning.io>
lwwmanning Apr 1, 2026
f2eef9a
fix docs
lwwmanning Apr 1, 2026
9195b4a
clean up WriteStrategyBuilder a bit more
lwwmanning Apr 1, 2026
874bca8
Merge remote-tracking branch 'origin/develop' into claude/admiring-li…
lwwmanning Apr 1, 2026
667f087
fixes
lwwmanning Apr 1, 2026
c42076d
review
lwwmanning Apr 1, 2026
691df15
review2
lwwmanning Apr 1, 2026
d6b3031
compressors
lwwmanning Apr 1, 2026
5e56d06
scheme improvements
lwwmanning Apr 1, 2026
a67f19f
fixes
lwwmanning Apr 1, 2026
a29c252
Merge remote-tracking branch 'origin/develop' into claude/admiring-li…
lwwmanning Apr 1, 2026
76e8004
min dimension 3
lwwmanning Apr 1, 2026
546e397
Merge remote-tracking branch 'origin/develop' into claude/admiring-li…
lwwmanning Apr 1, 2026
9e14703
merge
gatesn Apr 2, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion _typos.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[default]
extend-ignore-identifiers-re = ["ffor", "FFOR", "FoR", "typ", "ratatui"]
extend-ignore-identifiers-re = ["ffor", "FFOR", "FoR", "typ", "ratatui", "wht", "WHT"]
# We support a few common special comments to tell the checker to ignore sections of code
extend-ignore-re = [
"(#|//)\\s*spellchecker:ignore-next-line\\n.*", # Ignore the next line
Expand Down
10 changes: 10 additions & 0 deletions vortex-btrblocks/public-api.lock
Original file line number Diff line number Diff line change
Expand Up @@ -618,10 +618,18 @@ pub fn vortex_btrblocks::BtrBlocksCompressorBuilder::exclude(self, ids: impl cor

pub fn vortex_btrblocks::BtrBlocksCompressorBuilder::include(self, ids: impl core::iter::traits::collect::IntoIterator<Item = vortex_compressor::scheme::SchemeId>) -> Self

pub fn vortex_btrblocks::BtrBlocksCompressorBuilder::with_scheme(self, scheme: &'static dyn vortex_compressor::scheme::Scheme) -> Self

impl core::clone::Clone for vortex_btrblocks::BtrBlocksCompressorBuilder

pub fn vortex_btrblocks::BtrBlocksCompressorBuilder::clone(&self) -> vortex_btrblocks::BtrBlocksCompressorBuilder

impl core::cmp::Eq for vortex_btrblocks::BtrBlocksCompressorBuilder

impl core::cmp::PartialEq for vortex_btrblocks::BtrBlocksCompressorBuilder

pub fn vortex_btrblocks::BtrBlocksCompressorBuilder::eq(&self, other: &vortex_btrblocks::BtrBlocksCompressorBuilder) -> bool

impl core::default::Default for vortex_btrblocks::BtrBlocksCompressorBuilder

pub fn vortex_btrblocks::BtrBlocksCompressorBuilder::default() -> Self
Expand All @@ -630,6 +638,8 @@ impl core::fmt::Debug for vortex_btrblocks::BtrBlocksCompressorBuilder

pub fn vortex_btrblocks::BtrBlocksCompressorBuilder::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result

impl core::marker::StructuralPartialEq for vortex_btrblocks::BtrBlocksCompressorBuilder

pub const vortex_btrblocks::ALL_SCHEMES: &[&dyn vortex_compressor::scheme::Scheme]

pub fn vortex_btrblocks::compress_patches(patches: vortex_array::patches::Patches) -> vortex_error::VortexResult<vortex_array::patches::Patches>
Expand Down
11 changes: 10 additions & 1 deletion vortex-btrblocks/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ pub fn default_excluded() -> HashSet<SchemeId> {
/// .include([IntDictScheme.id()])
/// .build();
/// ```
#[derive(Debug, Clone)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct BtrBlocksCompressorBuilder {
schemes: HashSet<&'static dyn Scheme>,
}
Expand Down Expand Up @@ -144,6 +144,15 @@ impl BtrBlocksCompressorBuilder {
self
}

/// Adds an external compression scheme not in [`ALL_SCHEMES`].
///
/// This allows encoding crates outside of `vortex-btrblocks` to register
/// their own schemes with the compressor.
pub fn with_scheme(mut self, scheme: &'static dyn Scheme) -> Self {
self.schemes.insert(scheme);
self
}

/// Excludes the specified compression schemes by their [`SchemeId`].
pub fn exclude(mut self, ids: impl IntoIterator<Item = SchemeId>) -> Self {
let ids: HashSet<_> = ids.into_iter().collect();
Expand Down
1 change: 1 addition & 0 deletions vortex-file/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ vortex-scan = { workspace = true }
vortex-sequence = { workspace = true }
vortex-session = { workspace = true }
vortex-sparse = { workspace = true }
vortex-tensor = { workspace = true }
vortex-utils = { workspace = true, features = ["dashmap"] }
vortex-zigzag = { workspace = true }
vortex-zstd = { workspace = true, optional = true }
Expand Down
2 changes: 2 additions & 0 deletions vortex-file/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -178,4 +178,6 @@ pub fn register_default_encodings(session: &mut VortexSession) {
vortex_fastlanes::initialize(session);
vortex_runend::initialize(session);
vortex_sequence::initialize(session);
#[cfg(feature = "unstable_encodings")]
vortex_tensor::encodings::turboquant::initialize(session);
}
102 changes: 78 additions & 24 deletions vortex-file/src/strategy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,11 @@ use vortex_array::arrays::VarBinView;
use vortex_array::dtype::FieldPath;
use vortex_array::session::ArrayRegistry;
use vortex_array::session::ArraySession;
use vortex_btrblocks::BtrBlocksCompressorBuilder;
use vortex_bytebool::ByteBool;
use vortex_datetime_parts::DateTimeParts;
use vortex_decimal_byte_parts::DecimalByteParts;
use vortex_error::vortex_panic;
use vortex_fastlanes::BitPacked;
use vortex_fastlanes::Delta;
use vortex_fastlanes::FoR;
Expand All @@ -53,13 +55,14 @@ use vortex_pco::Pco;
use vortex_runend::RunEnd;
use vortex_sequence::Sequence;
use vortex_sparse::Sparse;
#[cfg(feature = "unstable_encodings")]
use vortex_tensor::encodings::turboquant::TurboQuant;
use vortex_utils::aliases::hash_map::HashMap;
use vortex_zigzag::ZigZag;

#[rustfmt::skip]
#[cfg(feature = "zstd")]
use vortex_btrblocks::{
BtrBlocksCompressorBuilder,
SchemeExt,
schemes::float,
schemes::integer,
Expand Down Expand Up @@ -111,6 +114,8 @@ pub static ALLOWED_ENCODINGS: LazyLock<ArrayRegistry> = LazyLock::new(|| {
session.register(RunEnd);
session.register(Sequence);
session.register(Sparse);
#[cfg(feature = "unstable_encodings")]
session.register(TurboQuant);
session.register(ZigZag);

#[cfg(feature = "zstd")]
Expand All @@ -127,23 +132,26 @@ pub static ALLOWED_ENCODINGS: LazyLock<ArrayRegistry> = LazyLock::new(|| {
/// repartitioning and compressing them to strike a balance between size on-disk,
/// bulk decoding performance, and IOPS required to perform an indexed read.
pub struct WriteStrategyBuilder {
compressor: Option<Arc<dyn CompressorPlugin>>,
row_block_size: usize,
field_writers: HashMap<FieldPath, Arc<dyn LayoutStrategy>>,
allow_encodings: Option<ArrayRegistry>,
flat_strategy: Option<Arc<dyn LayoutStrategy>>,
// builder and compressor are mutually exclusive
builder: Option<BtrBlocksCompressorBuilder>,
compressor: Option<Arc<dyn CompressorPlugin>>,
}

impl Default for WriteStrategyBuilder {
/// Create a new empty builder. It can be further configured,
/// and then finally built yielding the [`LayoutStrategy`].
fn default() -> Self {
Self {
compressor: None,
row_block_size: 8192,
field_writers: HashMap::new(),
allow_encodings: Some(ALLOWED_ENCODINGS.clone()),
flat_strategy: None,
builder: None,
compressor: None,
}
}
}
Expand All @@ -154,6 +162,9 @@ impl WriteStrategyBuilder {
/// If not provided, this will use a BtrBlocks-style cascading compressor that tries to balance
/// total size with decoding performance.
pub fn with_compressor<C: CompressorPlugin>(mut self, compressor: C) -> Self {
if self.builder.is_some() {
vortex_panic!("Cannot configure both a custom compressor and custom builder schemes");
}
self.compressor = Some(Arc::new(compressor));
self
}
Expand Down Expand Up @@ -198,7 +209,12 @@ impl WriteStrategyBuilder {
/// GPU decompression. Without it, strings use interleaved Zstd compression.
#[cfg(feature = "zstd")]
pub fn with_cuda_compatible_encodings(mut self) -> Self {
let mut builder = BtrBlocksCompressorBuilder::default().exclude([
if self.compressor.is_some() {
vortex_panic!(
"Cannot configure both a custom compressor and CUDA compatible encodings"
);
}
let b = self.builder.take().unwrap_or_default().exclude([
integer::SparseScheme.id(),
integer::RLE_INTEGER_SCHEME.id(),
float::RLE_FLOAT_SCHEME.id(),
Expand All @@ -209,14 +225,13 @@ impl WriteStrategyBuilder {

#[cfg(feature = "unstable_encodings")]
{
builder = builder.include([string::ZstdBuffersScheme.id()]);
self.builder = Some(b.include([string::ZstdBuffersScheme.id()]));
}
#[cfg(not(feature = "unstable_encodings"))]
{
builder = builder.include([string::ZstdScheme.id()]);
self.builder = Some(b.include([string::ZstdScheme.id()]));
}

self.compressor = Some(Arc::new(builder.build()));
self
}

Expand All @@ -227,21 +242,47 @@ impl WriteStrategyBuilder {
/// especially for floating-point heavy datasets.
#[cfg(feature = "zstd")]
pub fn with_compact_encodings(mut self) -> Self {
let btrblocks = BtrBlocksCompressorBuilder::default()
.include([
string::ZstdScheme.id(),
integer::PcoScheme.id(),
float::PcoScheme.id(),
])
.build();

self.compressor = Some(Arc::new(btrblocks));
if self.compressor.is_some() {
vortex_panic!("Cannot configure both a custom compressor and compact encodings");
}
self.builder = Some(self.builder.take().unwrap_or_default().include([
string::ZstdScheme.id(),
integer::PcoScheme.id(),
float::PcoScheme.id(),
]));
self
}

/// Enable TurboQuant lossy vector quantization for tensor columns.
///
/// When enabled, `Vector` and `FixedShapeTensor` extension arrays are
/// compressed using the TurboQuant algorithm with QJL correction for
/// unbiased inner product estimation.
///
/// This augments any existing compressor configuration rather than
/// replacing it. If no compressor has been set, the default BtrBlocks
/// compressor is used with TurboQuant added.
#[cfg(feature = "unstable_encodings")]
pub fn with_vector_quantization(mut self) -> Self {
if self.compressor.is_some() {
vortex_panic!("Cannot configure both a custom compressor and vector quantization");
}
use vortex_tensor::encodings::turboquant::scheme::TURBOQUANT_SCHEME;
self.builder = Some(
self.builder
.take()
.unwrap_or_default()
.with_scheme(&TURBOQUANT_SCHEME),
);
self
}

/// Builds the canonical [`LayoutStrategy`] implementation, with the configured overrides
/// applied.
pub fn build(self) -> Arc<dyn LayoutStrategy> {
use vortex_btrblocks::SchemeExt as _;
use vortex_btrblocks::schemes::integer::IntDictScheme;

let flat: Arc<dyn LayoutStrategy> = if let Some(flat) = self.flat_strategy {
flat
} else if let Some(allow_encodings) = self.allow_encodings {
Expand All @@ -254,12 +295,24 @@ impl WriteStrategyBuilder {
let chunked = ChunkedLayoutStrategy::new(flat.clone());
// 6. buffer chunks so they end up with closer segment ids physically
let buffered = BufferedStrategy::new(chunked, 2 * ONE_MEG); // 2MB

// 5. compress each chunk
let compressing = if let Some(ref compressor) = self.compressor {
CompressingStrategy::new_opaque(buffered, compressor.clone())
} else {
CompressingStrategy::new_btrblocks(buffered, true)
};
let data_compressor: Arc<dyn CompressorPlugin> =
if let Some(ref compressor) = self.compressor {
assert!(
self.builder.is_none(),
"Cannot configure both a custom compressor and custom builder schemes"
);
compressor.clone()
} else {
Arc::new(
self.builder
.unwrap_or_default()
.exclude([IntDictScheme.id()])
.build(),
)
};
let compressing = CompressingStrategy::new(buffered, data_compressor.clone());

// 4. prior to compression, coalesce up to a minimum size
let coalescing = RepartitionStrategy::new(
Expand All @@ -279,11 +332,12 @@ impl WriteStrategyBuilder {
);

// 2.1. | 3.1. compress stats tables and dict values.
let compress_then_flat = if let Some(ref compressor) = self.compressor {
CompressingStrategy::new_opaque(flat, compressor.clone())
let stats_compressor = if let Some(compressor) = self.compressor {
compressor.clone()
} else {
CompressingStrategy::new_btrblocks(flat, false)
Arc::new(BtrBlocksCompressorBuilder::default().build())
};
let compress_then_flat = CompressingStrategy::new(flat, stats_compressor);

// 3. apply dict encoding or fallback
let dict = DictStrategy::new(
Expand Down
5 changes: 3 additions & 2 deletions vortex-file/tests/test_write_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ use vortex_array::field_path;
use vortex_array::scalar_fn::session::ScalarFnSession;
use vortex_array::session::ArraySession;
use vortex_array::validity::Validity;
use vortex_btrblocks::BtrBlocksCompressor;
use vortex_buffer::ByteBuffer;
use vortex_file::OpenOptionsSessionExt;
use vortex_file::WriteOptionsSessionExt;
Expand Down Expand Up @@ -67,9 +68,9 @@ async fn test_file_roundtrip() {

// Create a writer which by default uses the BtrBlocks compressor for a.compressed, but leaves
// the b and the a.raw columns uncompressed.
let default_strategy = Arc::new(CompressingStrategy::new_btrblocks(
let default_strategy = Arc::new(CompressingStrategy::new(
FlatLayoutStrategy::default(),
false,
BtrBlocksCompressor::default(),
));

let writer = Arc::new(
Expand Down
4 changes: 1 addition & 3 deletions vortex-layout/public-api.lock
Original file line number Diff line number Diff line change
Expand Up @@ -168,9 +168,7 @@ pub struct vortex_layout::layouts::compressed::CompressingStrategy

impl vortex_layout::layouts::compressed::CompressingStrategy

pub fn vortex_layout::layouts::compressed::CompressingStrategy::new_btrblocks<S: vortex_layout::LayoutStrategy>(child: S, exclude_int_dict_encoding: bool) -> Self

pub fn vortex_layout::layouts::compressed::CompressingStrategy::new_opaque<S: vortex_layout::LayoutStrategy, C: vortex_layout::layouts::compressed::CompressorPlugin>(child: S, compressor: C) -> Self
pub fn vortex_layout::layouts::compressed::CompressingStrategy::new<S: vortex_layout::LayoutStrategy, C: vortex_layout::layouts::compressed::CompressorPlugin>(child: S, compressor: C) -> Self

pub fn vortex_layout::layouts::compressed::CompressingStrategy::with_concurrency(self, concurrency: usize) -> Self

Expand Down
Loading
Loading