@@ -24,10 +24,10 @@ pub use schema::TypeInfo;
2424
2525use crate :: naming;
2626use crate :: parser;
27- use crate :: plot:: aesthetic:: { is_positional_aesthetic, AestheticContext } ;
27+ use crate :: plot:: aesthetic:: is_positional_aesthetic;
2828use crate :: plot:: facet:: { resolve_properties as resolve_facet_properties, FacetDataContext } ;
2929use crate :: plot:: layer:: is_transposed;
30- use crate :: plot:: { AestheticValue , Layer , Scale , ScaleTypeKind , Schema } ;
30+ use crate :: plot:: { AestheticValue , Layer , ScaleTypeKind , Schema } ;
3131use crate :: { DataFrame , DataSource , GgsqlError , Plot , Result } ;
3232use std:: collections:: { HashMap , HashSet } ;
3333
@@ -656,41 +656,21 @@ fn resolve_facet(
656656}
657657
658658// =============================================================================
659- // Discrete Column Handling
659+ // Mapped Column Grouping
660660// =============================================================================
661661
662- /// Add discrete mapped columns to partition_by for all layers
662+ /// Add all non-positional mapped columns to partition_by for all layers.
663663///
664- /// For each layer, examines all aesthetic mappings and adds any that map to
665- /// discrete columns to the layer's partition_by. This ensures proper grouping
666- /// for all layers, not just stat geoms.
667- ///
668- /// Discreteness is determined by:
669- /// 1. If the aesthetic has an explicit scale with a scale_type:
670- /// - ScaleTypeKind::Discrete or Binned → discrete (add to partition_by)
671- /// - ScaleTypeKind::Continuous → not discrete (skip)
672- /// - ScaleTypeKind::Identity → fall back to schema
673- /// 2. Otherwise, use schema's is_discrete flag (based on column data type)
664+ /// This ensures mapped columns survive stat transforms (GROUP BY), matching
665+ /// ggplot2's behavior where all mapped aesthetics contribute to grouping
666+ /// regardless of data type. Without this, integer columns mapped to
667+ /// fill/color/etc. would be dropped by stat transforms like bar's COUNT.
674668///
675669/// Columns already in partition_by (from explicit PARTITION BY clause) are skipped.
676670/// Stat-consumed aesthetics (x for bar, x for histogram) are also skipped.
677- fn add_discrete_columns_to_partition_by (
678- layers : & mut [ Layer ] ,
679- layer_schemas : & [ Schema ] ,
680- scales : & [ Scale ] ,
681- aesthetic_ctx : & AestheticContext ,
682- ) {
683- // Build a map of aesthetic -> scale for quick lookup
684- let scale_map: HashMap < & str , & Scale > =
685- scales. iter ( ) . map ( |s| ( s. aesthetic . as_str ( ) , s) ) . collect ( ) ;
686-
671+ fn add_mapped_columns_to_partition_by ( layers : & mut [ Layer ] , layer_schemas : & [ Schema ] ) {
687672 for ( layer, schema) in layers. iter_mut ( ) . zip ( layer_schemas. iter ( ) ) {
688673 let schema_columns: HashSet < & str > = schema. iter ( ) . map ( |c| c. name . as_str ( ) ) . collect ( ) ;
689- let discrete_columns: HashSet < & str > = schema
690- . iter ( )
691- . filter ( |c| c. is_discrete )
692- . map ( |c| c. name . as_str ( ) )
693- . collect ( ) ;
694674
695675 // Build set of excluded aesthetics that should not trigger auto-grouping:
696676 // - Stat-consumed aesthetics (transformed, not grouped)
@@ -718,40 +698,11 @@ fn add_discrete_columns_to_partition_by(
718698 continue ;
719699 }
720700
721- // Determine if this aesthetic is discrete:
722- // 1. Check if there's an explicit scale with a scale_type
723- // 2. Fall back to schema's is_discrete
724- //
725- // Discrete and Binned scales produce categorical groupings.
726- // Continuous scales don't group. Identity defers to column type.
727- let primary_aes = aesthetic_ctx
728- . primary_internal_positional ( aesthetic)
729- . unwrap_or ( aesthetic) ;
730- let is_discrete = if let Some ( scale) = scale_map. get ( primary_aes) {
731- if let Some ( ref scale_type) = scale. scale_type {
732- match scale_type. scale_type_kind ( ) {
733- ScaleTypeKind :: Discrete
734- | ScaleTypeKind :: Binned
735- | ScaleTypeKind :: Ordinal => true ,
736- ScaleTypeKind :: Continuous => false ,
737- ScaleTypeKind :: Identity => discrete_columns. contains ( col) ,
738- }
739- } else {
740- // Scale exists but no explicit type - use schema
741- discrete_columns. contains ( col)
742- }
743- } else {
744- // No scale for this aesthetic - use schema
745- discrete_columns. contains ( col)
746- } ;
747-
748- // Skip if not discrete
749- if !is_discrete {
750- continue ;
751- }
752-
753- // Use the prefixed aesthetic column name, since the query renames
754- // columns to prefixed names (e.g., island → __ggsql_aes_fill__)
701+ // Add all non-positional, non-consumed mapped columns to partition_by.
702+ // This ensures they survive stat transforms (GROUP BY), matching ggplot2
703+ // where all mapped aesthetics contribute to grouping regardless of
704+ // data type. Without this, integer columns mapped to fill/color/etc.
705+ // would be dropped by stat transforms like bar's COUNT.
755706 let aes_col_name = naming:: aesthetic_column ( aesthetic) ;
756707
757708 // Skip if already in partition_by
@@ -1083,15 +1034,9 @@ pub fn prepare_data_with_reader<R: Reader>(query: &str, reader: &R) -> Result<Pr
10831034 // This must happen before apply_layer_transforms so pre_stat_transform_sql has resolved breaks
10841035 scale:: apply_pre_stat_resolve ( & mut specs[ 0 ] , & layer_schemas) ?;
10851036
1086- // Add discrete mapped columns to partition_by for all layers
1087- let scales = specs[ 0 ] . scales . clone ( ) ;
1088- let aesthetic_ctx = specs[ 0 ] . get_aesthetic_context ( ) ;
1089- add_discrete_columns_to_partition_by (
1090- & mut specs[ 0 ] . layers ,
1091- & layer_schemas,
1092- & scales,
1093- & aesthetic_ctx,
1094- ) ;
1037+ // Add all non-positional mapped columns to partition_by so they survive
1038+ // stat transforms (GROUP BY), matching ggplot2's grouping behavior.
1039+ add_mapped_columns_to_partition_by ( & mut specs[ 0 ] . layers , & layer_schemas) ;
10951040
10961041 // Clone scales for apply_layer_transforms
10971042 let scales = specs[ 0 ] . scales . clone ( ) ;
@@ -1519,6 +1464,59 @@ mod tests {
15191464 assert ! ( layer_df. height( ) < 100 ) ;
15201465 }
15211466
1467+ #[ cfg( feature = "duckdb" ) ]
1468+ #[ test]
1469+ fn test_bar_with_integer_fill_column ( ) {
1470+ // Integer columns mapped to non-positional aesthetics like fill should
1471+ // survive stat transforms (be included in GROUP BY), matching ggplot2
1472+ // behavior where all mapped aesthetics contribute to grouping.
1473+ let reader = DuckDBReader :: from_connection_string ( "duckdb://memory" ) . unwrap ( ) ;
1474+
1475+ reader
1476+ . connection ( )
1477+ . execute (
1478+ "CREATE TABLE int_fill_test AS SELECT * FROM (VALUES
1479+ ('Male', 0), ('Male', 1), ('Female', 0), ('Female', 1),
1480+ ('Male', 0), ('Male', 0), ('Female', 1), ('Female', 1)
1481+ ) AS t(sex, survived)" ,
1482+ duckdb:: params![ ] ,
1483+ )
1484+ . unwrap ( ) ;
1485+
1486+ let query = r#"
1487+ SELECT * FROM int_fill_test
1488+ VISUALISE
1489+ DRAW bar MAPPING sex AS x, survived AS fill
1490+ "# ;
1491+
1492+ let result = prepare_data_with_reader ( query, & reader) ;
1493+ assert ! (
1494+ result. is_ok( ) ,
1495+ "Integer column mapped to fill should not cause error: {:?}" ,
1496+ result. err( )
1497+ ) ;
1498+
1499+ let result = result. unwrap ( ) ;
1500+ let layer_df = result. data . get ( & naming:: layer_key ( 0 ) ) . unwrap ( ) ;
1501+
1502+ // Should have 4 rows: Male/0, Male/1, Female/0, Female/1
1503+ assert_eq ! ( layer_df. height( ) , 4 ) ;
1504+
1505+ // fill column should be present
1506+ let fill_col = naming:: aesthetic_column ( "fill" ) ;
1507+ let col_names: Vec < String > = layer_df
1508+ . get_column_names_str ( )
1509+ . iter ( )
1510+ . map ( |s| s. to_string ( ) )
1511+ . collect ( ) ;
1512+ assert ! (
1513+ col_names. contains( & fill_col) ,
1514+ "Expected fill column '{}' in {:?}" ,
1515+ fill_col,
1516+ col_names
1517+ ) ;
1518+ }
1519+
15221520 #[ cfg( feature = "duckdb" ) ]
15231521 #[ test]
15241522 fn test_bar_count_stat_transform ( ) {
0 commit comments