@@ -41,8 +41,6 @@ pub const ALL_SCHEMES: &[&dyn Scheme] = &[
4141 & integer:: RunEndScheme ,
4242 & integer:: SequenceScheme ,
4343 & rle:: RLE_INTEGER_SCHEME ,
44- #[ cfg( feature = "pco" ) ]
45- & integer:: PcoScheme ,
4644 ////////////////////////////////////////////////////////////////////////////////////////////////
4745 // Float schemes.
4846 ////////////////////////////////////////////////////////////////////////////////////////////////
@@ -52,98 +50,95 @@ pub const ALL_SCHEMES: &[&dyn Scheme] = &[
5250 & float:: FloatDictScheme ,
5351 & float:: NullDominatedSparseScheme ,
5452 & rle:: RLE_FLOAT_SCHEME ,
55- #[ cfg( feature = "pco" ) ]
56- & float:: PcoScheme ,
5753 ////////////////////////////////////////////////////////////////////////////////////////////////
5854 // String schemes.
5955 ////////////////////////////////////////////////////////////////////////////////////////////////
6056 & string:: StringDictScheme ,
6157 & string:: FSSTScheme ,
6258 & string:: StringConstantScheme ,
6359 & string:: NullDominatedSparseScheme ,
64- #[ cfg( feature = "zstd" ) ]
65- & string:: ZstdScheme ,
66- #[ cfg( all( feature = "zstd" , feature = "unstable_encodings" ) ) ]
67- & string:: ZstdBuffersScheme ,
6860 // Decimal schemes.
6961 & decimal:: DecimalScheme ,
7062 // Temporal schemes.
7163 & temporal:: TemporalScheme ,
7264] ;
7365
74- /// Returns the set of scheme IDs excluded by default (behind feature gates or known-expensive).
75- pub fn default_excluded ( ) -> HashSet < SchemeId > {
76- #[ allow( unused_mut, reason = "depends on enabled feature flags" ) ]
77- let mut excluded = HashSet :: new ( ) ;
78- #[ cfg( feature = "pco" ) ]
79- {
80- excluded. insert ( integer:: PcoScheme . id ( ) ) ;
81- excluded. insert ( float:: PcoScheme . id ( ) ) ;
82- }
83- #[ cfg( feature = "zstd" ) ]
84- excluded. insert ( string:: ZstdScheme . id ( ) ) ;
85- #[ cfg( all( feature = "zstd" , feature = "unstable_encodings" ) ) ]
86- excluded. insert ( string:: ZstdBuffersScheme . id ( ) ) ;
87- excluded
88- }
89-
9066/// Builder for creating configured [`BtrBlocksCompressor`] instances.
9167///
92- /// Use this builder to configure which compression schemes are allowed.
93- /// By default, all schemes are enabled except those in [`default_excluded`].
68+ /// By default, all schemes in [`ALL_SCHEMES`] are enabled. Feature-gated schemes (Pco, Zstd)
69+ /// are not in `ALL_SCHEMES` and must be added explicitly via
70+ /// [`with_new_scheme`](BtrBlocksCompressorBuilder::with_new_scheme) or
71+ /// [`with_compact`](BtrBlocksCompressorBuilder::with_compact).
9472///
9573/// # Examples
9674///
9775/// ```rust
9876/// use vortex_btrblocks::{BtrBlocksCompressorBuilder, Scheme, SchemeExt};
9977/// use vortex_btrblocks::schemes::integer::IntDictScheme;
10078///
101- /// // Default compressor - all non-excluded schemes allowed .
79+ /// // Default compressor with all schemes in ALL_SCHEMES .
10280/// let compressor = BtrBlocksCompressorBuilder::default().build();
10381///
10482/// // Exclude specific schemes.
10583/// let compressor = BtrBlocksCompressorBuilder::default()
10684/// .exclude([IntDictScheme.id()])
10785/// .build();
108- ///
109- /// // Exclude then re-include.
110- /// let compressor = BtrBlocksCompressorBuilder::default()
111- /// .exclude([IntDictScheme.id()])
112- /// .include([IntDictScheme.id()])
113- /// .build();
11486/// ```
11587#[ derive( Debug , Clone ) ]
11688pub struct BtrBlocksCompressorBuilder {
117- schemes : HashSet < & ' static dyn Scheme > ,
89+ schemes : Vec < & ' static dyn Scheme > ,
11890}
11991
12092impl Default for BtrBlocksCompressorBuilder {
12193 fn default ( ) -> Self {
122- let excluded = default_excluded ( ) ;
12394 Self {
124- schemes : ALL_SCHEMES
125- . iter ( )
126- . copied ( )
127- . filter ( |s| !excluded. contains ( & s. id ( ) ) )
128- . collect ( ) ,
95+ schemes : ALL_SCHEMES . to_vec ( ) ,
12996 }
13097 }
13198}
13299
133100impl BtrBlocksCompressorBuilder {
134- /// Includes the specified compression schemes by their [`SchemeId `].
101+ /// Adds an external compression scheme not in [`ALL_SCHEMES `].
135102 ///
136- /// Only schemes present in [`ALL_SCHEMES`] can be included.
137- pub fn include ( mut self , ids : impl IntoIterator < Item = SchemeId > ) -> Self {
138- let ids: HashSet < _ > = ids. into_iter ( ) . collect ( ) ;
139- for scheme in ALL_SCHEMES {
140- if ids. contains ( & scheme. id ( ) ) {
141- self . schemes . insert ( * scheme) ;
142- }
143- }
103+ /// This allows encoding crates outside of `vortex-btrblocks` to register their own schemes with
104+ /// the compressor.
105+ ///
106+ /// # Panics
107+ ///
108+ /// Panics if a scheme with the same [`SchemeId`] is already present.
109+ pub fn with_new_scheme ( mut self , scheme : & ' static dyn Scheme ) -> Self {
110+ assert ! (
111+ !self . schemes. iter( ) . any( |s| s. id( ) == scheme. id( ) ) ,
112+ "scheme {:?} is already present in the builder" ,
113+ scheme. id( ) ,
114+ ) ;
115+
116+ self . schemes . push ( scheme) ;
144117 self
145118 }
146119
120+ /// Adds compact encoding schemes (Zstd for strings, Pco for numerics).
121+ ///
122+ /// This provides better compression ratios than the default, especially for floating-point
123+ /// heavy datasets. Requires the `zstd` feature. When the `pco` feature is also enabled,
124+ /// Pco schemes for integers and floats are included.
125+ ///
126+ /// # Panics
127+ ///
128+ /// Panics if any of the compact schemes are already present.
129+ #[ cfg( feature = "zstd" ) ]
130+ pub fn with_compact ( self ) -> Self {
131+ // This should be fast since we don't have that many schemes.
132+ let builder = self . with_new_scheme ( & string:: ZstdScheme ) ;
133+
134+ #[ cfg( feature = "pco" ) ]
135+ let builder = builder
136+ . with_new_scheme ( & integer:: PcoScheme )
137+ . with_new_scheme ( & float:: PcoScheme ) ;
138+
139+ builder
140+ }
141+
147142 /// Excludes the specified compression schemes by their [`SchemeId`].
148143 pub fn exclude ( mut self , ids : impl IntoIterator < Item = SchemeId > ) -> Self {
149144 let ids: HashSet < _ > = ids. into_iter ( ) . collect ( ) ;
@@ -152,15 +147,7 @@ impl BtrBlocksCompressorBuilder {
152147 }
153148
154149 /// Builds the configured [`BtrBlocksCompressor`].
155- ///
156- /// The resulting scheme list preserves the order of [`ALL_SCHEMES`] for deterministic
157- /// tie-breaking.
158150 pub fn build ( self ) -> BtrBlocksCompressor {
159- let schemes = ALL_SCHEMES
160- . iter ( )
161- . copied ( )
162- . filter ( |s| self . schemes . contains ( s) )
163- . collect ( ) ;
164- BtrBlocksCompressor ( CascadingCompressor :: new ( schemes) )
151+ BtrBlocksCompressor ( CascadingCompressor :: new ( self . schemes ) )
165152 }
166153}
0 commit comments