Skip to content

Commit b573853

Browse files
authored
Clean up btrblocks compressor interface (#7274)
## Summary We had a very strange interface for the `BtrBlocksCompressor` and builder previously. However, now that the compressor is fully pluggable, we can make it a lot nicer. ## API Changes Reduces the API (and really the semantics) of the `BtrBlocksCompressor` to just a default with a methods that allow us to add new (pluggable) schemes and exclude schemes by ID. And then for compact encodings there's just a simple helper function that wraps those `with_compact`. ## Testing The interface changes, but none of the logic changed. Signed-off-by: Connor Tsui <connor.tsui20@gmail.com>
1 parent 886aefc commit b573853

File tree

9 files changed

+144
-140
lines changed

9 files changed

+144
-140
lines changed

fuzz/src/array/mod.rs

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -60,10 +60,6 @@ use vortex_array::search_sorted::SearchSorted;
6060
use vortex_array::search_sorted::SearchSortedSide;
6161
use vortex_btrblocks::BtrBlocksCompressor;
6262
use vortex_btrblocks::BtrBlocksCompressorBuilder;
63-
use vortex_btrblocks::SchemeExt;
64-
use vortex_btrblocks::schemes::float;
65-
use vortex_btrblocks::schemes::integer;
66-
use vortex_btrblocks::schemes::string;
6763
use vortex_error::VortexExpect;
6864
use vortex_error::vortex_panic;
6965
use vortex_mask::Mask;
@@ -546,11 +542,7 @@ pub fn compress_array(array: &ArrayRef, strategy: CompressorStrategy) -> ArrayRe
546542
.compress(array)
547543
.vortex_expect("BtrBlocksCompressor compress should succeed in fuzz test"),
548544
CompressorStrategy::Compact => BtrBlocksCompressorBuilder::default()
549-
.include([
550-
string::ZstdScheme.id(),
551-
integer::PcoScheme.id(),
552-
float::PcoScheme.id(),
553-
])
545+
.with_compact()
554546
.build()
555547
.compress(array)
556548
.vortex_expect("Compact compress should succeed in fuzz test"),

vortex-btrblocks/public-api.lock

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -616,7 +616,9 @@ pub fn vortex_btrblocks::BtrBlocksCompressorBuilder::build(self) -> vortex_btrbl
616616

617617
pub fn vortex_btrblocks::BtrBlocksCompressorBuilder::exclude(self, ids: impl core::iter::traits::collect::IntoIterator<Item = vortex_compressor::scheme::SchemeId>) -> Self
618618

619-
pub fn vortex_btrblocks::BtrBlocksCompressorBuilder::include(self, ids: impl core::iter::traits::collect::IntoIterator<Item = vortex_compressor::scheme::SchemeId>) -> Self
619+
pub fn vortex_btrblocks::BtrBlocksCompressorBuilder::with_compact(self) -> Self
620+
621+
pub fn vortex_btrblocks::BtrBlocksCompressorBuilder::with_new_scheme(self, scheme: &'static dyn vortex_compressor::scheme::Scheme) -> Self
620622

621623
impl core::clone::Clone for vortex_btrblocks::BtrBlocksCompressorBuilder
622624

@@ -633,5 +635,3 @@ pub fn vortex_btrblocks::BtrBlocksCompressorBuilder::fmt(&self, f: &mut core::fm
633635
pub const vortex_btrblocks::ALL_SCHEMES: &[&dyn vortex_compressor::scheme::Scheme]
634636

635637
pub fn vortex_btrblocks::compress_patches(patches: vortex_array::patches::Patches) -> vortex_error::VortexResult<vortex_array::patches::Patches>
636-
637-
pub fn vortex_btrblocks::default_excluded() -> vortex_utils::aliases::hash_set::HashSet<vortex_compressor::scheme::SchemeId>

vortex-btrblocks/src/builder.rs

Lines changed: 45 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,6 @@ pub const ALL_SCHEMES: &[&dyn Scheme] = &[
4141
&integer::RunEndScheme,
4242
&integer::SequenceScheme,
4343
&rle::RLE_INTEGER_SCHEME,
44-
#[cfg(feature = "pco")]
45-
&integer::PcoScheme,
4644
////////////////////////////////////////////////////////////////////////////////////////////////
4745
// Float schemes.
4846
////////////////////////////////////////////////////////////////////////////////////////////////
@@ -52,98 +50,95 @@ pub const ALL_SCHEMES: &[&dyn Scheme] = &[
5250
&float::FloatDictScheme,
5351
&float::NullDominatedSparseScheme,
5452
&rle::RLE_FLOAT_SCHEME,
55-
#[cfg(feature = "pco")]
56-
&float::PcoScheme,
5753
////////////////////////////////////////////////////////////////////////////////////////////////
5854
// String schemes.
5955
////////////////////////////////////////////////////////////////////////////////////////////////
6056
&string::StringDictScheme,
6157
&string::FSSTScheme,
6258
&string::StringConstantScheme,
6359
&string::NullDominatedSparseScheme,
64-
#[cfg(feature = "zstd")]
65-
&string::ZstdScheme,
66-
#[cfg(all(feature = "zstd", feature = "unstable_encodings"))]
67-
&string::ZstdBuffersScheme,
6860
// Decimal schemes.
6961
&decimal::DecimalScheme,
7062
// Temporal schemes.
7163
&temporal::TemporalScheme,
7264
];
7365

74-
/// Returns the set of scheme IDs excluded by default (behind feature gates or known-expensive).
75-
pub fn default_excluded() -> HashSet<SchemeId> {
76-
#[allow(unused_mut, reason = "depends on enabled feature flags")]
77-
let mut excluded = HashSet::new();
78-
#[cfg(feature = "pco")]
79-
{
80-
excluded.insert(integer::PcoScheme.id());
81-
excluded.insert(float::PcoScheme.id());
82-
}
83-
#[cfg(feature = "zstd")]
84-
excluded.insert(string::ZstdScheme.id());
85-
#[cfg(all(feature = "zstd", feature = "unstable_encodings"))]
86-
excluded.insert(string::ZstdBuffersScheme.id());
87-
excluded
88-
}
89-
9066
/// Builder for creating configured [`BtrBlocksCompressor`] instances.
9167
///
92-
/// Use this builder to configure which compression schemes are allowed.
93-
/// By default, all schemes are enabled except those in [`default_excluded`].
68+
/// By default, all schemes in [`ALL_SCHEMES`] are enabled. Feature-gated schemes (Pco, Zstd)
69+
/// are not in `ALL_SCHEMES` and must be added explicitly via
70+
/// [`with_new_scheme`](BtrBlocksCompressorBuilder::with_new_scheme) or
71+
/// [`with_compact`](BtrBlocksCompressorBuilder::with_compact).
9472
///
9573
/// # Examples
9674
///
9775
/// ```rust
9876
/// use vortex_btrblocks::{BtrBlocksCompressorBuilder, Scheme, SchemeExt};
9977
/// use vortex_btrblocks::schemes::integer::IntDictScheme;
10078
///
101-
/// // Default compressor - all non-excluded schemes allowed.
79+
/// // Default compressor with all schemes in ALL_SCHEMES.
10280
/// let compressor = BtrBlocksCompressorBuilder::default().build();
10381
///
10482
/// // Exclude specific schemes.
10583
/// let compressor = BtrBlocksCompressorBuilder::default()
10684
/// .exclude([IntDictScheme.id()])
10785
/// .build();
108-
///
109-
/// // Exclude then re-include.
110-
/// let compressor = BtrBlocksCompressorBuilder::default()
111-
/// .exclude([IntDictScheme.id()])
112-
/// .include([IntDictScheme.id()])
113-
/// .build();
11486
/// ```
11587
#[derive(Debug, Clone)]
11688
pub struct BtrBlocksCompressorBuilder {
117-
schemes: HashSet<&'static dyn Scheme>,
89+
schemes: Vec<&'static dyn Scheme>,
11890
}
11991

12092
impl Default for BtrBlocksCompressorBuilder {
12193
fn default() -> Self {
122-
let excluded = default_excluded();
12394
Self {
124-
schemes: ALL_SCHEMES
125-
.iter()
126-
.copied()
127-
.filter(|s| !excluded.contains(&s.id()))
128-
.collect(),
95+
schemes: ALL_SCHEMES.to_vec(),
12996
}
13097
}
13198
}
13299

133100
impl BtrBlocksCompressorBuilder {
134-
/// Includes the specified compression schemes by their [`SchemeId`].
101+
/// Adds an external compression scheme not in [`ALL_SCHEMES`].
135102
///
136-
/// Only schemes present in [`ALL_SCHEMES`] can be included.
137-
pub fn include(mut self, ids: impl IntoIterator<Item = SchemeId>) -> Self {
138-
let ids: HashSet<_> = ids.into_iter().collect();
139-
for scheme in ALL_SCHEMES {
140-
if ids.contains(&scheme.id()) {
141-
self.schemes.insert(*scheme);
142-
}
143-
}
103+
/// This allows encoding crates outside of `vortex-btrblocks` to register their own schemes with
104+
/// the compressor.
105+
///
106+
/// # Panics
107+
///
108+
/// Panics if a scheme with the same [`SchemeId`] is already present.
109+
pub fn with_new_scheme(mut self, scheme: &'static dyn Scheme) -> Self {
110+
assert!(
111+
!self.schemes.iter().any(|s| s.id() == scheme.id()),
112+
"scheme {:?} is already present in the builder",
113+
scheme.id(),
114+
);
115+
116+
self.schemes.push(scheme);
144117
self
145118
}
146119

120+
/// Adds compact encoding schemes (Zstd for strings, Pco for numerics).
121+
///
122+
/// This provides better compression ratios than the default, especially for floating-point
123+
/// heavy datasets. Requires the `zstd` feature. When the `pco` feature is also enabled,
124+
/// Pco schemes for integers and floats are included.
125+
///
126+
/// # Panics
127+
///
128+
/// Panics if any of the compact schemes are already present.
129+
#[cfg(feature = "zstd")]
130+
pub fn with_compact(self) -> Self {
131+
// This should be fast since we don't have that many schemes.
132+
let builder = self.with_new_scheme(&string::ZstdScheme);
133+
134+
#[cfg(feature = "pco")]
135+
let builder = builder
136+
.with_new_scheme(&integer::PcoScheme)
137+
.with_new_scheme(&float::PcoScheme);
138+
139+
builder
140+
}
141+
147142
/// Excludes the specified compression schemes by their [`SchemeId`].
148143
pub fn exclude(mut self, ids: impl IntoIterator<Item = SchemeId>) -> Self {
149144
let ids: HashSet<_> = ids.into_iter().collect();
@@ -152,15 +147,7 @@ impl BtrBlocksCompressorBuilder {
152147
}
153148

154149
/// Builds the configured [`BtrBlocksCompressor`].
155-
///
156-
/// The resulting scheme list preserves the order of [`ALL_SCHEMES`] for deterministic
157-
/// tie-breaking.
158150
pub fn build(self) -> BtrBlocksCompressor {
159-
let schemes = ALL_SCHEMES
160-
.iter()
161-
.copied()
162-
.filter(|s| self.schemes.contains(s))
163-
.collect();
164-
BtrBlocksCompressor(CascadingCompressor::new(schemes))
151+
BtrBlocksCompressor(CascadingCompressor::new(self.schemes))
165152
}
166153
}

vortex-btrblocks/src/lib.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,6 @@ pub mod schemes;
6363
// Btrblocks-specific exports.
6464
pub use builder::ALL_SCHEMES;
6565
pub use builder::BtrBlocksCompressorBuilder;
66-
pub use builder::default_excluded;
6766
pub use canonical_compressor::BtrBlocksCompressor;
6867
pub use schemes::patches::compress_patches;
6968
pub use vortex_compressor::CascadingCompressor;

0 commit comments

Comments
 (0)