From a9cababcfc1dafe7e39d9187a6ee7c108df7730e Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Wed, 4 Mar 2026 16:51:45 +0100 Subject: [PATCH 01/26] Add PLACE clause for annotation layers Introduces PLACE as a new top-level clause for creating annotation layers with literal values, similar to ggplot2's annotate() function. Changes: - Grammar: Add place_clause accepting geom and optional SETTING - DataSource: Add Annotation variant to mark annotation layers - Builder: Handle place_clause and set DataSource::Annotation - Execution: Add stub for annotation data source generation - Tests: Add parser test for PLACE clause PLACE layers use DataSource::Annotation as a marker and don't inherit global mappings. All aesthetic values come from SETTING parameters. Co-Authored-By: Claude Sonnet 4.5 --- src/execute/casting.rs | 5 +++++ src/parser/builder.rs | 6 +++++ src/parser/mod.rs | 43 ++++++++++++++++++++++++++++++++++++ src/plot/types.rs | 8 +++++++ tree-sitter-ggsql/grammar.js | 9 ++++++++ 5 files changed, 71 insertions(+) diff --git a/src/execute/casting.rs b/src/execute/casting.rs index de370d86..c3f35851 100644 --- a/src/execute/casting.rs +++ b/src/execute/casting.rs @@ -223,6 +223,11 @@ pub fn determine_layer_source( Some(DataSource::FilePath(path)) => { format!("'{}'", path) } + Some(DataSource::Annotation) => { + // TODO: Implement annotation layer data source generation + // For now, generate a dummy single-row table + "(SELECT 1 AS __ggsql_dummy__)".to_string() + } None => { // Layer uses global data - caller must ensure has_global is true debug_assert!(has_global, "Layer has no source and no global data"); diff --git a/src/parser/builder.rs b/src/parser/builder.rs index f7bc9cb1..a4760bd1 100644 --- a/src/parser/builder.rs +++ b/src/parser/builder.rs @@ -305,6 +305,12 @@ fn process_viz_clause(node: &Node, source: &SourceTree, spec: &mut Plot) -> Resu let layer = build_layer(&child, source)?; spec.layers.push(layer); } + "place_clause" => { + let mut layer = build_layer(&child, source)?; + // Mark as annotation layer with dummy data source + layer.source = Some(DataSource::Annotation); + spec.layers.push(layer); + } "scale_clause" => { let scale = build_scale(&child, source)?; spec.scales.push(scale); diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6c80d63a..7efa5c35 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -413,4 +413,47 @@ mod tests { Some(DataSource::Identifier("cte".to_string())) ); } + + #[test] + fn test_place_clause() { + let query = r#" + SELECT x, y FROM data + VISUALISE x, y + DRAW point + PLACE text SETTING x => 5, y => 10, label => 'Hello' + "#; + + let result = parse_query(query); + assert!(result.is_ok(), "Failed to parse PLACE clause: {:?}", result); + + let specs = result.unwrap(); + assert_eq!(specs.len(), 1); + assert_eq!(specs[0].layers.len(), 2, "Expected 2 layers (DRAW + PLACE)"); + + // First layer: regular DRAW point + assert_eq!(specs[0].layers[0].geom, Geom::point()); + assert!(specs[0].layers[0].source.is_none(), "DRAW layer should have no explicit source"); + + // Second layer: PLACE text with annotation source + assert_eq!(specs[0].layers[1].geom, Geom::text()); + assert_eq!( + specs[0].layers[1].source, + Some(DataSource::Annotation), + "PLACE layer should have Annotation source" + ); + + // Verify SETTING parameters are preserved + assert_eq!( + specs[0].layers[1].parameters.get("x"), + Some(&ParameterValue::Number(5.0)) + ); + assert_eq!( + specs[0].layers[1].parameters.get("y"), + Some(&ParameterValue::Number(10.0)) + ); + assert_eq!( + specs[0].layers[1].parameters.get("label"), + Some(&ParameterValue::String("Hello".to_string())) + ); + } } diff --git a/src/plot/types.rs b/src/plot/types.rs index 95bfc2d6..4414ff7e 100644 --- a/src/plot/types.rs +++ b/src/plot/types.rs @@ -140,6 +140,8 @@ pub enum DataSource { Identifier(String), /// File path (quoted string like 'data.csv') FilePath(String), + /// Annotation layer (PLACE clause) - generates dummy single-row source + Annotation, } impl DataSource { @@ -148,6 +150,7 @@ impl DataSource { match self { DataSource::Identifier(s) => s, DataSource::FilePath(s) => s, + DataSource::Annotation => "__annotation__", } } @@ -155,6 +158,11 @@ impl DataSource { pub fn is_file(&self) -> bool { matches!(self, DataSource::FilePath(_)) } + + /// Returns true if this is an annotation layer source + pub fn is_annotation(&self) -> bool { + matches!(self, DataSource::Annotation) + } } // ============================================================================= diff --git a/tree-sitter-ggsql/grammar.js b/tree-sitter-ggsql/grammar.js index 9302f195..dc1a8487 100644 --- a/tree-sitter-ggsql/grammar.js +++ b/tree-sitter-ggsql/grammar.js @@ -442,6 +442,7 @@ module.exports = grammar({ // All the visualization clauses (same as current grammar) viz_clause: $ => choice( $.draw_clause, + $.place_clause, $.scale_clause, $.facet_clause, $.project_clause, @@ -461,6 +462,14 @@ module.exports = grammar({ optional($.order_clause) ), + // PLACE clause - syntax: PLACE geom [SETTING ...] + // For annotation layers with literal values only (no data mappings) + place_clause: $ => seq( + caseInsensitive('PLACE'), + $.geom_type, + optional($.setting_clause) + ), + // REMAPPING clause: maps stat-computed columns to aesthetics // Syntax: REMAPPING count AS y, sum AS size // Reuses mapping_list for parsing - stat names are treated as column references From 214b53a065e11775f7df5c4fc72ae729a063905e Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Wed, 4 Mar 2026 17:12:24 +0100 Subject: [PATCH 02/26] Implement execution for PLACE annotation layers Completes the execution pipeline for PLACE layers: Changes: - Skip annotation layers in merge_global_mappings_into_layers() to prevent inheritance of global mappings from VISUALISE - Clarify annotation data source comment: 1-row dummy satisfies execution pipeline requirements (schema detection, type resolution) even though SETTING literals render as Vega-Lite datum values Annotation layers now properly isolated from global mappings while still working with the existing execution infrastructure. Co-Authored-By: Claude Sonnet 4.5 --- src/execute/casting.rs | 7 +++++-- src/execute/mod.rs | 7 ++++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/execute/casting.rs b/src/execute/casting.rs index c3f35851..f2639203 100644 --- a/src/execute/casting.rs +++ b/src/execute/casting.rs @@ -224,8 +224,11 @@ pub fn determine_layer_source( format!("'{}'", path) } Some(DataSource::Annotation) => { - // TODO: Implement annotation layer data source generation - // For now, generate a dummy single-row table + // Annotation layer - generate a single-row dummy table. + // The execution pipeline expects all layers to have a DataFrame, even though + // SETTING literals eventually render as Vega-Lite datum values ({"value": ...}) + // that don't reference the data. The 1-row dummy satisfies schema detection, + // type resolution, and other intermediate steps that expect data to exist. "(SELECT 1 AS __ggsql_dummy__)".to_string() } None => { diff --git a/src/execute/mod.rs b/src/execute/mod.rs index a8f17e07..ac74982c 100644 --- a/src/execute/mod.rs +++ b/src/execute/mod.rs @@ -26,7 +26,7 @@ use crate::parser; use crate::plot::aesthetic::{is_positional_aesthetic, AestheticContext}; use crate::plot::facet::{resolve_properties as resolve_facet_properties, FacetDataContext}; use crate::plot::{AestheticValue, Layer, Scale, ScaleTypeKind, Schema}; -use crate::{DataFrame, GgsqlError, Plot, Result}; +use crate::{DataFrame, DataSource, GgsqlError, Plot, Result}; use std::collections::{HashMap, HashSet}; use crate::reader::Reader; @@ -153,6 +153,11 @@ fn validate(layers: &[Layer], layer_schemas: &[Schema]) -> Result<()> { fn merge_global_mappings_into_layers(specs: &mut [Plot], layer_schemas: &[Schema]) { for spec in specs { for (layer, schema) in spec.layers.iter_mut().zip(layer_schemas.iter()) { + // Skip annotation layers - they don't inherit global mappings + if matches!(layer.source, Some(DataSource::Annotation)) { + continue; + } + let supported = layer.geom.aesthetics().supported(); let schema_columns: HashSet<&str> = schema.iter().map(|c| c.name.as_str()).collect(); From cada5bdc3a6427436cbc9a14c92ec5e75de23c14 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Thu, 5 Mar 2026 11:21:08 +0100 Subject: [PATCH 03/26] Transform position aesthetics for PLACE annotation layers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enables annotation layers to participate in scale training and coordinate transformation pipeline by moving positional/required aesthetics from SETTING parameters to mappings. Changes: - Transform parameter keys to internal space (x → pos1, y → pos2) - Move positional and required aesthetics from parameters to mappings for annotation layers in transform_aesthetics_to_internal() - Refactor place_clause handling in builder.rs into build_place_layer() - Add comprehensive test coverage: parser, execution, validation, and rendering tests verifying field vs value encoding Annotation position aesthetics now materialize as columns, enabling them to affect scale ranges and be properly encoded in Vega-Lite output as field encodings (not datum values). Co-Authored-By: Claude Sonnet 4.5 --- src/execute/mod.rs | 207 ++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 65 +++++++++++++ src/parser/builder.rs | 24 ++++- src/parser/mod.rs | 20 ++-- src/plot/main.rs | 55 ++++++++++- 5 files changed, 359 insertions(+), 12 deletions(-) diff --git a/src/execute/mod.rs b/src/execute/mod.rs index ac74982c..cad3ffe8 100644 --- a/src/execute/mod.rs +++ b/src/execute/mod.rs @@ -1230,6 +1230,7 @@ pub fn prepare_data_with_reader(query: &str, reader: &R) -> Result 2, y => 25, label => 'Annotation', size => 14 + "#; + + let result = prepare_data_with_reader(query, &reader).unwrap(); + + assert_eq!(result.specs.len(), 1); + assert_eq!(result.specs[0].layers.len(), 2, "Should have DRAW + PLACE layers"); + + // First layer: regular DRAW point + let point_layer = &result.specs[0].layers[0]; + assert_eq!(point_layer.geom, crate::Geom::point()); + assert!(point_layer.source.is_none(), "DRAW layer should have no explicit source"); + + // Second layer: PLACE text annotation + let annotation_layer = &result.specs[0].layers[1]; + assert_eq!(annotation_layer.geom, crate::Geom::text()); + assert_eq!( + annotation_layer.source, + Some(DataSource::Annotation), + "PLACE layer should have Annotation source" + ); + + // Verify annotation layer has 1-row data + let annotation_key = annotation_layer.data_key.as_ref().unwrap(); + let annotation_df = result.data.get(annotation_key).unwrap(); + assert_eq!( + annotation_df.height(), + 1, + "Annotation layer should have exactly 1 row" + ); + + // Verify positional aesthetics are moved from SETTING to mappings with transformed names + // They become Column references (not Literals) so they can participate in scale training + assert!( + matches!( + annotation_layer.mappings.get("pos1"), + Some(AestheticValue::Column { name, .. }) if name == "__ggsql_aes_pos1__" + ), + "x should be transformed to pos1, moved to mappings, and materialized as column" + ); + assert!( + matches!( + annotation_layer.mappings.get("pos2"), + Some(AestheticValue::Column { name, .. }) if name == "__ggsql_aes_pos2__" + ), + "y should be transformed to pos2, moved to mappings, and materialized as column" + ); + + // Verify non-positional aesthetics are in mappings as Literals (via resolve_aesthetics) + assert!( + matches!( + annotation_layer.mappings.get("label"), + Some(AestheticValue::Literal(ParameterValue::String(s))) if s == "Annotation" + ), + "label should be in mappings as Literal" + ); + assert!( + matches!( + annotation_layer.mappings.get("size"), + Some(AestheticValue::Literal(ParameterValue::Number(n))) if *n == 14.0 + ), + "size should be in mappings as Literal" + ); + + // Parameters should be empty after resolve_aesthetics moves them to mappings + assert!( + annotation_layer.parameters.is_empty(), + "All SETTING parameters should be moved to mappings" + ); + } + + #[cfg(feature = "duckdb")] + #[test] + fn test_place_missing_required_aesthetic() { + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + let query = r#" + SELECT 1 AS x, 2 AS y + VISUALISE x, y + DRAW point + PLACE text SETTING x => 5, label => 'Missing y!' + "#; + + let result = prepare_data_with_reader(query, &reader); + assert!(result.is_err(), "Should fail validation"); + + match result { + Err(GgsqlError::ValidationError(msg)) => { + assert!( + msg.contains("pos2"), + "Error should mention missing pos2 aesthetic: {}", + msg + ); + } + Err(e) => panic!("Expected ValidationError, got: {}", e), + Ok(_) => panic!("Expected error, got success"), + } + } + + #[cfg(feature = "duckdb")] + #[test] + fn test_place_affects_scale_ranges() { + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + // Data has x from 1-3, but PLACE annotation at x=10 should extend the range + let query = r#" + SELECT 1 AS x, 10 AS y UNION ALL + SELECT 2 AS x, 20 AS y UNION ALL + SELECT 3 AS x, 30 AS y + VISUALISE x, y + DRAW point + PLACE text SETTING x => 10, y => 50, label => 'Extended' + "#; + + let result = prepare_data_with_reader(query, &reader); + assert!(result.is_ok(), "Query should execute: {:?}", result.err()); + + let prep = result.unwrap(); + + // Check that x scale input_range includes both data range (1-3) and annotation point (10) + let x_scale = prep.specs[0].find_scale("pos1"); + assert!(x_scale.is_some(), "Should have x scale"); + + let scale = x_scale.unwrap(); + if let Some(input_range) = &scale.input_range { + // Input range should include the annotation x value (10) + // The scale input_range is resolved from the combined data + annotation DataFrames + assert!( + input_range.len() >= 2, + "Scale input_range should have min/max values" + ); + // Check that the range max is at least 10 (the annotation x value) + if let Some(max_val) = input_range.last() { + match max_val { + crate::plot::types::ArrayElement::Number(n) => { + assert!( + *n >= 10.0, + "Scale input_range max should include annotation point at x=10, got: {}", + n + ); + } + _ => panic!("Expected numeric input_range value"), + } + } + } else { + panic!("Scale should have an input_range"); + } + } + + #[cfg(feature = "duckdb")] + #[test] + fn test_place_no_global_mapping_inheritance() { + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + let query = r#" + SELECT 1 AS x, 2 AS y, 'red' AS color + VISUALISE x, y, color + DRAW point + PLACE text SETTING x => 5, y => 10, label => 'Test' + "#; + + let result = prepare_data_with_reader(query, &reader).unwrap(); + + // DRAW layer should have inherited global color mapping + let point_layer = &result.specs[0].layers[0]; + assert!( + point_layer.mappings.contains_key("color") || + point_layer.mappings.contains_key("fill") || + point_layer.mappings.contains_key("stroke"), + "DRAW layer should inherit color from global mappings" + ); + + // PLACE layer should NOT have inherited global mappings + let annotation_layer = &result.specs[0].layers[1]; + assert!( + !annotation_layer.mappings.contains_key("color"), + "PLACE layer should not inherit color from global mappings" + ); + assert!( + !annotation_layer.mappings.contains_key("fill"), + "PLACE layer should not inherit fill from global mappings" + ); + assert!( + !annotation_layer.mappings.contains_key("stroke"), + "PLACE layer should not inherit stroke from global mappings" + ); + } } diff --git a/src/lib.rs b/src/lib.rs index 6e3b8074..0363aa59 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -758,6 +758,71 @@ mod integration_tests { ); } + #[test] + fn test_end_to_end_place_field_vs_value_encoding() { + // Test that PLACE annotation layers render correctly: + // - Positional aesthetics (x, y) as field encodings (reference columns) + // - Non-positional aesthetics (size, stroke) as value encodings (datum values) + + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + let query = r#" + SELECT 1 AS x, 10 AS y UNION ALL SELECT 2 AS x, 20 AS y + VISUALISE x, y + DRAW point + PLACE text SETTING x => 5, y => 30, label => 'Annotation', size => 16, stroke => 'red' + "#; + + let prepared = execute::prepare_data_with_reader(query, &reader).unwrap(); + + // Render to Vega-Lite + let writer = VegaLiteWriter::new(); + let json_str = writer.write(&prepared.specs[0], &prepared.data).unwrap(); + let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap(); + + // Find the text layer (should be second layer) + let text_layer = &vl_spec["layer"][1]; + + // Mark can be either a string or an object with type + let mark_type = if text_layer["mark"].is_string() { + text_layer["mark"].as_str().unwrap() + } else { + text_layer["mark"]["type"].as_str().unwrap() + }; + assert_eq!(mark_type, "text", "Second layer should be text"); + + let encoding = &text_layer["encoding"]; + + // Positional aesthetics should be field encodings (have "field" key) + assert!( + encoding["x"]["field"].is_string(), + "x should be a field encoding: {:?}", + encoding["x"] + ); + assert!( + encoding["y"]["field"].is_string(), + "y should be a field encoding: {:?}", + encoding["y"] + ); + + // Non-positional aesthetics should be value encodings (have "value" key) + // Note: size may be scaled/transformed, so just check it's present as a value + assert!( + encoding["size"]["value"].is_number(), + "size should be a value encoding with numeric value: {:?}", + encoding["size"] + ); + + // Note: stroke color goes through resolve_aesthetics and may be in different location + // Just verify it's present somewhere as a literal + let text_mark = &text_layer["mark"]; + let has_stroke_value = encoding.get("stroke") + .and_then(|s| s.get("value")) + .is_some() + || text_mark.get("stroke").is_some(); + assert!(has_stroke_value, "stroke should be present as a value"); + } + #[test] fn test_end_to_end_global_constant_in_visualise() { // Test that global constants in VISUALISE clause work correctly diff --git a/src/parser/builder.rs b/src/parser/builder.rs index a4760bd1..5fe74b5c 100644 --- a/src/parser/builder.rs +++ b/src/parser/builder.rs @@ -293,6 +293,10 @@ fn build_visualise_statement(node: &Node, source: &SourceTree) -> Result { // since geom definitions use internal names for their supported/required aesthetics spec.transform_aesthetics_to_internal(); + // Process annotation layers: move positional/required parameters to mappings + // This must happen AFTER transform_aesthetics_to_internal() so parameter keys are in internal space + spec.process_annotation_layers(); + Ok(spec) } @@ -306,9 +310,7 @@ fn process_viz_clause(node: &Node, source: &SourceTree, spec: &mut Plot) -> Resu spec.layers.push(layer); } "place_clause" => { - let mut layer = build_layer(&child, source)?; - // Mark as annotation layer with dummy data source - layer.source = Some(DataSource::Annotation); + let layer = build_place_layer(&child, source)?; spec.layers.push(layer); } "scale_clause" => { @@ -495,6 +497,22 @@ fn build_layer(node: &Node, source: &SourceTree) -> Result { Ok(layer) } +/// Build an annotation Layer from a place_clause node +/// This is similar to build_layer but marks it as an annotation layer. +/// The transformation of positional/required aesthetics from SETTING to mappings +/// happens later in Plot::transform_aesthetics_to_internal(). +/// Syntax: PLACE geom [MAPPING col AS x, ...] [SETTING param => val, ...] [FILTER condition] +fn build_place_layer(node: &Node, source: &SourceTree) -> Result { + // Build the layer using standard logic + let mut layer = build_layer(node, source)?; + + // Mark as annotation layer with dummy data source + // This triggers special handling in transform_aesthetics_to_internal() + layer.source = Some(DataSource::Annotation); + + Ok(layer) +} + /// Parse a setting_clause: SETTING param => value, ... fn parse_setting_clause( node: &Node, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 7efa5c35..385ff4b9 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -442,18 +442,22 @@ mod tests { "PLACE layer should have Annotation source" ); - // Verify SETTING parameters are preserved - assert_eq!( - specs[0].layers[1].parameters.get("x"), - Some(&ParameterValue::Number(5.0)) + // Verify positional aesthetics moved to mappings with internal names + // (transform_aesthetics_to_internal runs as part of parse_query) + assert!( + specs[0].layers[1].mappings.contains_key("pos1"), + "x should be transformed to pos1 and moved to mappings" ); - assert_eq!( - specs[0].layers[1].parameters.get("y"), - Some(&ParameterValue::Number(10.0)) + assert!( + specs[0].layers[1].mappings.contains_key("pos2"), + "y should be transformed to pos2 and moved to mappings" ); + + // Verify non-positional parameter (label) stays in parameters (with internal name = same) assert_eq!( specs[0].layers[1].parameters.get("label"), - Some(&ParameterValue::String("Hello".to_string())) + Some(&ParameterValue::String("Hello".to_string())), + "Non-positional parameters remain in parameters" ); } } diff --git a/src/plot/main.rs b/src/plot/main.rs index a211ac34..9cf9bc65 100644 --- a/src/plot/main.rs +++ b/src/plot/main.rs @@ -168,6 +168,7 @@ impl Plot { /// - Global mappings /// - Layer aesthetics /// - Layer remappings + /// - Layer parameters (SETTING aesthetics) /// - Scale aesthetics pub fn transform_aesthetics_to_internal(&mut self) { let ctx = self.get_aesthetic_context(); @@ -175,10 +176,21 @@ impl Plot { // Transform global mappings self.global_mappings.transform_to_internal(&ctx); - // Transform layer aesthetics and remappings + // Transform layer aesthetics, remappings, and parameters for layer in &mut self.layers { layer.mappings.transform_to_internal(&ctx); layer.remappings.transform_to_internal(&ctx); + + // Transform parameter keys from user-facing to internal names + // This ensures position aesthetics in SETTING (e.g., x => 5) become internal (pos1 => 5) + let original_parameters = std::mem::take(&mut layer.parameters); + for (param_name, value) in original_parameters { + let internal_name = ctx + .map_user_to_internal(¶m_name) + .map(|s| s.to_string()) + .unwrap_or(param_name); + layer.parameters.insert(internal_name, value); + } } // Transform scale aesthetics @@ -189,6 +201,47 @@ impl Plot { } } + /// Process annotation layers by moving positional and required aesthetics + /// from parameters to mappings. + /// + /// This must be called AFTER transform_aesthetics_to_internal() so that + /// parameter keys are already in internal space (pos1, pos2, etc.). + /// + /// Positional aesthetics need to be in mappings so they go through the same + /// transformation pipeline (internal→user space) as data-mapped aesthetics, + /// enabling them to participate in scale training and coordinate transformations. + pub fn process_annotation_layers(&mut self) { + for layer in &mut self.layers { + if !matches!(layer.source, Some(crate::DataSource::Annotation)) { + continue; + } + + let required_aesthetics = layer.geom.aesthetics().required(); + // Collect parameter keys first to avoid borrow checker issues + let param_keys: Vec = layer.parameters.keys().cloned().collect(); + + for param_name in param_keys { + // Check if this is a positional aesthetic OR a required aesthetic + let is_positional = crate::plot::aesthetic::is_positional_aesthetic(¶m_name); + let is_required = required_aesthetics.contains(¶m_name.as_str()); + + if is_positional || is_required { + // Skip if already in mappings + if layer.mappings.contains_key(¶m_name) { + continue; + } + + // Move from parameters to mappings (both now use internal names) + if let Some(value) = layer.parameters.remove(¶m_name) { + layer + .mappings + .insert(¶m_name, crate::AestheticValue::Literal(value)); + } + } + } + } + } + /// Check if the spec has any layers pub fn has_layers(&self) -> bool { !self.layers.is_empty() From d541c34f53124d38d5c86f77f62c5c39eb54f029 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Thu, 5 Mar 2026 12:26:45 +0100 Subject: [PATCH 04/26] Add length parameter to DataSource::Annotation Change DataSource::Annotation from a unit variant to a tuple variant containing the number of rows to generate (usize). This prepares for array expansion support in PLACE annotation layers. Changes: - DataSource::Annotation becomes DataSource::Annotation(usize) - Initialize annotation layers with length 1 in build_place_layer() - Generate multi-row VALUES clause in determine_layer_source() when n > 1 - Update all matches!() checks to use DataSource::Annotation(_) - Update test assertions to use pattern matching The length will be updated in process_annotation_layers() when arrays are present (to be implemented next). Co-Authored-By: Claude Sonnet 4.5 --- src/execute/casting.rs | 14 ++++++++++---- src/execute/mod.rs | 7 +++---- src/parser/builder.rs | 6 +++--- src/parser/mod.rs | 5 ++--- src/plot/main.rs | 2 +- src/plot/types.rs | 9 +++++---- 6 files changed, 24 insertions(+), 19 deletions(-) diff --git a/src/execute/casting.rs b/src/execute/casting.rs index f2639203..3f6ec86c 100644 --- a/src/execute/casting.rs +++ b/src/execute/casting.rs @@ -223,13 +223,19 @@ pub fn determine_layer_source( Some(DataSource::FilePath(path)) => { format!("'{}'", path) } - Some(DataSource::Annotation) => { - // Annotation layer - generate a single-row dummy table. + Some(DataSource::Annotation(n)) => { + // Annotation layer - generate a dummy table with n rows. // The execution pipeline expects all layers to have a DataFrame, even though // SETTING literals eventually render as Vega-Lite datum values ({"value": ...}) - // that don't reference the data. The 1-row dummy satisfies schema detection, + // that don't reference the data. The n-row dummy satisfies schema detection, // type resolution, and other intermediate steps that expect data to exist. - "(SELECT 1 AS __ggsql_dummy__)".to_string() + if *n == 1 { + "(SELECT 1 AS __ggsql_dummy__)".to_string() + } else { + // Generate VALUES clause with n rows: VALUES (1), (2), ..., (n) + let rows: Vec = (1..=*n).map(|i| format!("({})", i)).collect(); + format!("(VALUES {} AS t(__ggsql_dummy__))", rows.join(", ")) + } } None => { // Layer uses global data - caller must ensure has_global is true diff --git a/src/execute/mod.rs b/src/execute/mod.rs index cad3ffe8..5399eab7 100644 --- a/src/execute/mod.rs +++ b/src/execute/mod.rs @@ -154,7 +154,7 @@ fn merge_global_mappings_into_layers(specs: &mut [Plot], layer_schemas: &[Schema for spec in specs { for (layer, schema) in spec.layers.iter_mut().zip(layer_schemas.iter()) { // Skip annotation layers - they don't inherit global mappings - if matches!(layer.source, Some(DataSource::Annotation)) { + if matches!(layer.source, Some(DataSource::Annotation(_))) { continue; } @@ -2237,9 +2237,8 @@ mod tests { // Second layer: PLACE text annotation let annotation_layer = &result.specs[0].layers[1]; assert_eq!(annotation_layer.geom, crate::Geom::text()); - assert_eq!( - annotation_layer.source, - Some(DataSource::Annotation), + assert!( + matches!(annotation_layer.source, Some(DataSource::Annotation(_))), "PLACE layer should have Annotation source" ); diff --git a/src/parser/builder.rs b/src/parser/builder.rs index 5fe74b5c..37baa911 100644 --- a/src/parser/builder.rs +++ b/src/parser/builder.rs @@ -506,9 +506,9 @@ fn build_place_layer(node: &Node, source: &SourceTree) -> Result { // Build the layer using standard logic let mut layer = build_layer(node, source)?; - // Mark as annotation layer with dummy data source - // This triggers special handling in transform_aesthetics_to_internal() - layer.source = Some(DataSource::Annotation); + // Mark as annotation layer with initial length of 1 + // The length may be updated in process_annotation_layers() if arrays are present + layer.source = Some(DataSource::Annotation(1)); Ok(layer) } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 385ff4b9..0d9983f3 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -436,9 +436,8 @@ mod tests { // Second layer: PLACE text with annotation source assert_eq!(specs[0].layers[1].geom, Geom::text()); - assert_eq!( - specs[0].layers[1].source, - Some(DataSource::Annotation), + assert!( + matches!(specs[0].layers[1].source, Some(DataSource::Annotation(_))), "PLACE layer should have Annotation source" ); diff --git a/src/plot/main.rs b/src/plot/main.rs index 9cf9bc65..752fe6c6 100644 --- a/src/plot/main.rs +++ b/src/plot/main.rs @@ -212,7 +212,7 @@ impl Plot { /// enabling them to participate in scale training and coordinate transformations. pub fn process_annotation_layers(&mut self) { for layer in &mut self.layers { - if !matches!(layer.source, Some(crate::DataSource::Annotation)) { + if !matches!(layer.source, Some(crate::DataSource::Annotation(_))) { continue; } diff --git a/src/plot/types.rs b/src/plot/types.rs index 4414ff7e..8b5e4edb 100644 --- a/src/plot/types.rs +++ b/src/plot/types.rs @@ -140,8 +140,9 @@ pub enum DataSource { Identifier(String), /// File path (quoted string like 'data.csv') FilePath(String), - /// Annotation layer (PLACE clause) - generates dummy single-row source - Annotation, + /// Annotation layer (PLACE clause) with number of annotation rows + /// The usize represents how many rows to generate (from array expansion) + Annotation(usize), } impl DataSource { @@ -150,7 +151,7 @@ impl DataSource { match self { DataSource::Identifier(s) => s, DataSource::FilePath(s) => s, - DataSource::Annotation => "__annotation__", + DataSource::Annotation(_) => "__annotation__", } } @@ -161,7 +162,7 @@ impl DataSource { /// Returns true if this is an annotation layer source pub fn is_annotation(&self) -> bool { - matches!(self, DataSource::Annotation) + matches!(self, DataSource::Annotation(_)) } } From a6ab787cc57e49f2acc83f5865b5622d894ee23b Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Thu, 5 Mar 2026 14:15:11 +0100 Subject: [PATCH 05/26] Add array recycling support for PLACE annotation layers This commit implements array parameter recycling for annotation layers, allowing scalars and length-1 arrays to be automatically recycled to match the length of the longest array parameter. Changes: - Updated DataSource::Annotation to store row count: Annotation(usize) - Added recycle_value_to_length() helper in plot/main.rs - Implemented process_annotation_layers() to handle array recycling - Updated schema inference to handle arrays in literal aesthetic mappings - Modified literal_to_sql() to generate CASE WHEN statements for arrays - Fixed SQL generation for multi-row VALUES clause in annotation layers - Added tests for array recycling and mismatched length validation Recycling Rules: - Scalars recycle to max array length - Length-1 arrays recycle to max array length - All non-1 arrays must have same length (error on mismatch) - Only required/positional aesthetics get recycled when moved to mappings Co-Authored-By: Claude Sonnet 4.5 --- src/execute/casting.rs | 45 +++++++++++++++-- src/execute/mod.rs | 54 +++++++++++++++++++++ src/execute/schema.rs | 85 +++++++++++++++++++++++--------- src/parser/builder.rs | 3 +- src/plot/main.rs | 108 ++++++++++++++++++++++++++++++++++++++--- 5 files changed, 260 insertions(+), 35 deletions(-) diff --git a/src/execute/casting.rs b/src/execute/casting.rs index 3f6ec86c..43dbf018 100644 --- a/src/execute/casting.rs +++ b/src/execute/casting.rs @@ -4,7 +4,7 @@ //! scale requirements and updating type info accordingly. use crate::plot::scale::coerce_dtypes; -use crate::plot::{CastTargetType, Layer, ParameterValue, Plot, SqlTypeNames}; +use crate::plot::{ArrayElement, CastTargetType, Layer, ParameterValue, Plot, SqlTypeNames}; use crate::{naming, DataSource}; use polars::prelude::{DataType, TimeUnit}; use std::collections::{HashMap, HashSet}; @@ -34,9 +34,44 @@ pub fn literal_to_sql(lit: &ParameterValue) -> String { "FALSE".to_string() } } - ParameterValue::Array(_) | ParameterValue::Null => { - unreachable!("Grammar prevents arrays and null in literal aesthetic mappings") + ParameterValue::Array(arr) => { + // Generate CASE WHEN statement to select array element by row number + // The annotation layer dummy table has __ggsql_dummy__ column with values 1, 2, 3, ... + let mut case_stmt = String::from("CASE __ggsql_dummy__"); + for (i, elem) in arr.iter().enumerate() { + let row_num = i + 1; // Row numbers start at 1 + let value_sql = match elem { + ArrayElement::String(s) => format!("'{}'", s.replace('\'', "''")), + ArrayElement::Number(n) => n.to_string(), + ArrayElement::Boolean(b) => { + if *b { + "TRUE".to_string() + } else { + "FALSE".to_string() + } + } + ArrayElement::Date(d) => { + // Convert days since epoch to DATE + format!("DATE '1970-01-01' + INTERVAL {} DAY", d) + } + ArrayElement::DateTime(dt) => { + // Convert microseconds since epoch to TIMESTAMP + format!("TIMESTAMP '1970-01-01 00:00:00' + INTERVAL {} MICROSECOND", dt) + } + ArrayElement::Time(t) => { + // Convert nanoseconds since midnight to TIME + let seconds = t / 1_000_000_000; + let nanos = t % 1_000_000_000; + format!("TIME '00:00:00' + INTERVAL {} SECOND + INTERVAL {} NANOSECOND", seconds, nanos) + } + ArrayElement::Null => "NULL".to_string(), + }; + case_stmt.push_str(&format!(" WHEN {} THEN {}", row_num, value_sql)); + } + case_stmt.push_str(" END"); + case_stmt } + ParameterValue::Null => "NULL".to_string(), } } @@ -232,9 +267,9 @@ pub fn determine_layer_source( if *n == 1 { "(SELECT 1 AS __ggsql_dummy__)".to_string() } else { - // Generate VALUES clause with n rows: VALUES (1), (2), ..., (n) + // Generate VALUES clause with n rows: (VALUES (1), (2), ..., (n)) AS t(col) let rows: Vec = (1..=*n).map(|i| format!("({})", i)).collect(); - format!("(VALUES {} AS t(__ggsql_dummy__))", rows.join(", ")) + format!("(SELECT * FROM (VALUES {}) AS t(__ggsql_dummy__))", rows.join(", ")) } } None => { diff --git a/src/execute/mod.rs b/src/execute/mod.rs index 5399eab7..7ec4d3db 100644 --- a/src/execute/mod.rs +++ b/src/execute/mod.rs @@ -2369,6 +2369,60 @@ mod tests { } } + #[cfg(feature = "duckdb")] + #[test] + fn test_place_array_recycling_scalar() { + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + let query = r#" + SELECT 1 AS x, 10 AS y + VISUALISE x, y + DRAW point + PLACE text SETTING x => [1, 2, 3], y => 10, label => 'Same' + "#; + + let result = prepare_data_with_reader(query, &reader).unwrap(); + + let annotation_layer = &result.specs[0].layers[1]; + assert_eq!( + annotation_layer.source, + Some(DataSource::Annotation(3)), + "Should recycle scalar y to length 3" + ); + + let annotation_key = annotation_layer.data_key.as_ref().unwrap(); + let annotation_df = result.data.get(annotation_key).unwrap(); + assert_eq!(annotation_df.height(), 3, "Should have 3 rows"); + } + + #[cfg(feature = "duckdb")] + #[test] + fn test_place_array_mismatched_lengths_error() { + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + let query = r#" + SELECT 1 AS x, 10 AS y + VISUALISE x, y + DRAW point + PLACE text SETTING x => [1, 2, 3], y => [10, 20], label => 'Test' + "#; + + let result = prepare_data_with_reader(query, &reader); + assert!(result.is_err(), "Should error on mismatched array lengths"); + + match result { + Err(GgsqlError::ValidationError(msg)) => { + assert!( + msg.contains("mismatched array lengths"), + "Error should mention mismatched lengths: {}", + msg + ); + } + Err(e) => panic!("Expected ValidationError, got: {}", e), + Ok(_) => panic!("Expected error, got success"), + } + } + #[cfg(feature = "duckdb")] #[test] fn test_place_no_global_mapping_inheritance() { diff --git a/src/execute/schema.rs b/src/execute/schema.rs index b5b7cad8..c59b390a 100644 --- a/src/execute/schema.rs +++ b/src/execute/schema.rs @@ -6,9 +6,9 @@ //! 2. Apply casting to queries //! 3. complete_schema_ranges() - get min/max from cast queries -use crate::plot::{AestheticValue, ColumnInfo, Layer, ParameterValue, Schema}; +use crate::plot::{AestheticValue, ArrayElement, ColumnInfo, Layer, ParameterValue, Schema}; use crate::{naming, DataFrame, Result}; -use polars::prelude::DataType; +use polars::prelude::{DataType, TimeUnit}; /// Simple type info tuple: (name, dtype, is_discrete) pub type TypeInfo = (String, DataType, bool); @@ -246,16 +246,37 @@ pub fn add_literal_columns_to_type_info(layers: &[Layer], layer_type_info: &mut for (layer, type_info) in layers.iter().zip(layer_type_info.iter_mut()) { for (aesthetic, value) in &layer.mappings.aesthetics { if let AestheticValue::Literal(lit) = value { - let dtype = match lit { - ParameterValue::String(_) => DataType::String, - ParameterValue::Number(_) => DataType::Float64, - ParameterValue::Boolean(_) => DataType::Boolean, - ParameterValue::Array(_) | ParameterValue::Null => unreachable!( - "Grammar prevents arrays and null in literal aesthetic mappings" - ), + let (dtype, is_discrete) = match lit { + ParameterValue::String(_) => (DataType::String, true), + ParameterValue::Number(_) => (DataType::Float64, false), + ParameterValue::Boolean(_) => (DataType::Boolean, true), + ParameterValue::Array(arr) => { + // Infer dtype from first element (arrays are homogeneous) + if let Some(first) = arr.first() { + match first { + ArrayElement::String(_) => (DataType::String, true), + ArrayElement::Number(_) => (DataType::Float64, false), + ArrayElement::Boolean(_) => (DataType::Boolean, true), + ArrayElement::Date(_) => (DataType::Date, false), + ArrayElement::DateTime(_) => { + (DataType::Datetime(TimeUnit::Microseconds, None), false) + } + ArrayElement::Time(_) => (DataType::Time, false), + ArrayElement::Null => { + // Null element: default to Float64 + (DataType::Float64, false) + } + } + } else { + // Empty array: default to Float64 + (DataType::Float64, false) + } + } + ParameterValue::Null => { + // Skip null literals - they don't create columns + continue; + } }; - let is_discrete = - matches!(lit, ParameterValue::String(_) | ParameterValue::Boolean(_)); let col_name = naming::aesthetic_column(aesthetic); // Only add if not already present @@ -310,21 +331,41 @@ pub fn build_aesthetic_schema(layer: &Layer, schema: &Schema) -> Schema { } AestheticValue::Literal(lit) => { // Literals become columns with appropriate type - let dtype = match lit { - ParameterValue::String(_) => DataType::String, - ParameterValue::Number(_) => DataType::Float64, - ParameterValue::Boolean(_) => DataType::Boolean, - ParameterValue::Array(_) | ParameterValue::Null => unreachable!( - "Grammar prevents arrays and null in literal aesthetic mappings" - ), + let (dtype, is_discrete) = match lit { + ParameterValue::String(_) => (DataType::String, true), + ParameterValue::Number(_) => (DataType::Float64, false), + ParameterValue::Boolean(_) => (DataType::Boolean, true), + ParameterValue::Array(arr) => { + // Infer dtype from first element (arrays are homogeneous) + if let Some(first) = arr.first() { + match first { + ArrayElement::String(_) => (DataType::String, true), + ArrayElement::Number(_) => (DataType::Float64, false), + ArrayElement::Boolean(_) => (DataType::Boolean, true), + ArrayElement::Date(_) => (DataType::Date, false), + ArrayElement::DateTime(_) => { + (DataType::Datetime(TimeUnit::Microseconds, None), false) + } + ArrayElement::Time(_) => (DataType::Time, false), + ArrayElement::Null => { + // Null element: default to Float64 + (DataType::Float64, false) + } + } + } else { + // Empty array: default to Float64 + (DataType::Float64, false) + } + } + ParameterValue::Null => { + // Null: default to Float64 + (DataType::Float64, false) + } }; aesthetic_schema.push(ColumnInfo { name: aes_col_name, dtype, - is_discrete: matches!( - lit, - ParameterValue::String(_) | ParameterValue::Boolean(_) - ), + is_discrete, min: None, max: None, }); diff --git a/src/parser/builder.rs b/src/parser/builder.rs index 37baa911..6f82f1c7 100644 --- a/src/parser/builder.rs +++ b/src/parser/builder.rs @@ -295,7 +295,8 @@ fn build_visualise_statement(node: &Node, source: &SourceTree) -> Result { // Process annotation layers: move positional/required parameters to mappings // This must happen AFTER transform_aesthetics_to_internal() so parameter keys are in internal space - spec.process_annotation_layers(); + // Returns error if arrays have mismatched lengths + spec.process_annotation_layers()?; Ok(spec) } diff --git a/src/plot/main.rs b/src/plot/main.rs index 752fe6c6..ec3d0ffd 100644 --- a/src/plot/main.rs +++ b/src/plot/main.rs @@ -47,6 +47,59 @@ pub use super::projection::{Coord, Projection}; // Re-export Facet types from the facet module pub use super::facet::{Facet, FacetLayout}; +// ============================================================================= +// Helper Functions +// ============================================================================= + +/// Recycle a scalar or length-1 array to a target length. +/// +/// Recycling rules: +/// - Scalar values (Number, String, Boolean) → Array with n copies as ArrayElements +/// - Length-1 arrays → Expand to n copies of the single element +/// - Length-n arrays → Return unchanged if n matches target, error otherwise +fn recycle_value_to_length( + value: ParameterValue, + target_length: usize, +) -> Result { + match value { + // Scalars: convert to array with n copies as ArrayElements + ParameterValue::Number(n) => Ok(ParameterValue::Array( + vec![ArrayElement::Number(n); target_length], + )), + ParameterValue::String(s) => Ok(ParameterValue::Array( + vec![ArrayElement::String(s); target_length], + )), + ParameterValue::Boolean(b) => Ok(ParameterValue::Array( + vec![ArrayElement::Boolean(b); target_length], + )), + ParameterValue::Null => Ok(ParameterValue::Array( + vec![ArrayElement::Null; target_length], + )), + // Arrays: recycle if length 1, return unchanged if matches target, error otherwise + ParameterValue::Array(arr) => { + if arr.len() == 1 { + // Recycle the single element + let element = arr[0].clone(); + Ok(ParameterValue::Array(vec![element; target_length])) + } else if arr.len() == target_length { + // Already correct length + Ok(ParameterValue::Array(arr)) + } else { + // Mismatched length - shouldn't happen if validation passed + Err(crate::GgsqlError::InternalError(format!( + "Attempted to recycle array of length {} to length {} (should have been caught earlier)", + arr.len(), + target_length + ))) + } + } + } +} + +// ============================================================================= +// Plot Type +// ============================================================================= + /// Complete ggsql visualization specification #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Plot { @@ -202,22 +255,50 @@ impl Plot { } /// Process annotation layers by moving positional and required aesthetics - /// from parameters to mappings. + /// from parameters to mappings, with array recycling support. /// /// This must be called AFTER transform_aesthetics_to_internal() so that /// parameter keys are already in internal space (pos1, pos2, etc.). /// + /// Array recycling rules: + /// - Scalars and length-1 arrays are recycled to match the max array length + /// - All non-1 arrays must have the same length (error on mismatch) + /// - Only required/positional aesthetics that are moved to mappings get recycled + /// /// Positional aesthetics need to be in mappings so they go through the same /// transformation pipeline (internal→user space) as data-mapped aesthetics, /// enabling them to participate in scale training and coordinate transformations. - pub fn process_annotation_layers(&mut self) { + pub fn process_annotation_layers(&mut self) -> Result<(), crate::GgsqlError> { for layer in &mut self.layers { - if !matches!(layer.source, Some(crate::DataSource::Annotation(_))) { - continue; + // Get initial length from DataSource + let mut max_length = match &layer.source { + Some(DataSource::Annotation(n)) => *n, + _ => continue, + }; + + // Step 1: Determine max array length from all parameters + let mut array_lengths: std::collections::HashMap = + std::collections::HashMap::new(); + + for (param_name, value) in &layer.parameters { + if let ParameterValue::Array(arr) = value { + let len = arr.len(); + array_lengths.insert(param_name.clone(), len); + if len > 1 && len != max_length { + if max_length > 1 { + // Multiple different non-1 lengths - error + return Err(crate::GgsqlError::ValidationError(format!( + "PLACE annotation layer has mismatched array lengths: '{}' has length {}, but another has length {}", + param_name, len, max_length + ))); + } + max_length = len; + } + } } + // Step 2: Move required/positional aesthetics to mappings with recycling let required_aesthetics = layer.geom.aesthetics().required(); - // Collect parameter keys first to avoid borrow checker issues let param_keys: Vec = layer.parameters.keys().cloned().collect(); for param_name in param_keys { @@ -231,15 +312,28 @@ impl Plot { continue; } - // Move from parameters to mappings (both now use internal names) + // Move from parameters to mappings with recycling if let Some(value) = layer.parameters.remove(¶m_name) { + let recycled_value = if max_length > 1 { + recycle_value_to_length(value, max_length)? + } else { + value + }; + layer .mappings - .insert(¶m_name, crate::AestheticValue::Literal(value)); + .insert(¶m_name, AestheticValue::Literal(recycled_value)); } } } + + // Step 3: Update DataSource with the correct length + if max_length > 1 { + layer.source = Some(DataSource::Annotation(max_length)); + } } + + Ok(()) } /// Check if the spec has any layers From 17d0de2e408b15d227b35f468a6922726ab1a9df Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Thu, 5 Mar 2026 16:01:59 +0100 Subject: [PATCH 06/26] Fix PLACE annotation layers with mixed-type array parameters This commit fixes a bug where PLACE text annotation layers with array parameters (e.g., label => [1, 'foo']) displayed incorrectly. Changes: - Add ArrayElement::homogenize() to coerce mixed-type arrays to a common type - Process annotation layers to move array parameters to mappings - Skip non-positional aesthetics for annotation layers in scale training - Remove string-to-number parsing in JSON serialization to preserve types The fix ensures that arrays like [1, 'foo'] are homogenized to ["1", "foo"] and rendered correctly as separate labels in the visualization. Co-Authored-By: Claude Sonnet 4.5 --- src/execute/casting.rs | 1 + src/execute/scale.rs | 14 ++++++++++- src/plot/main.rs | 16 +++++++++--- src/plot/types.rs | 50 +++++++++++++++++++++++++++++++++++++ src/writer/vegalite/data.rs | 12 ++------- 5 files changed, 78 insertions(+), 15 deletions(-) diff --git a/src/execute/casting.rs b/src/execute/casting.rs index 43dbf018..443eb3d7 100644 --- a/src/execute/casting.rs +++ b/src/execute/casting.rs @@ -37,6 +37,7 @@ pub fn literal_to_sql(lit: &ParameterValue) -> String { ParameterValue::Array(arr) => { // Generate CASE WHEN statement to select array element by row number // The annotation layer dummy table has __ggsql_dummy__ column with values 1, 2, 3, ... + // Arrays are homogeneous (homogenized in recycle_value_to_length), so no type mixing let mut case_stmt = String::from("CASE __ggsql_dummy__"); for (i, elem) in arr.iter().enumerate() { let row_num = i + 1; // Row numbers start at 1 diff --git a/src/execute/scale.rs b/src/execute/scale.rs index 3ad4cf2b..a9e52d7d 100644 --- a/src/execute/scale.rs +++ b/src/execute/scale.rs @@ -14,7 +14,7 @@ use crate::plot::{ AestheticValue, ArrayElement, ArrayElementType, ColumnInfo, Layer, ParameterValue, Plot, Scale, ScaleType, ScaleTypeKind, Schema, }; -use crate::{DataFrame, GgsqlError, Result}; +use crate::{DataFrame, DataSource, GgsqlError, Result}; use polars::prelude::Column; use std::collections::{HashMap, HashSet}; @@ -995,6 +995,8 @@ pub fn find_columns_for_aesthetic<'a>( data_map: &'a HashMap, aesthetic_ctx: &AestheticContext, ) -> Vec<&'a Column> { + use crate::plot::aesthetic::is_positional_aesthetic; + let mut column_refs = Vec::new(); let aesthetics_to_check = aesthetic_ctx .internal_positional_family(aesthetic) @@ -1003,6 +1005,16 @@ pub fn find_columns_for_aesthetic<'a>( // Check each layer's mapping - every layer has its own data for (i, layer) in layers.iter().enumerate() { + // For annotation layers, only train scales for positional aesthetics (pos1, pos2) + // Other aesthetics (label, color, size, etc.) should use literal values + let is_annotation = matches!(layer.source, Some(DataSource::Annotation(_))); + let is_positional_family = is_positional_aesthetic(aesthetic); + + if is_annotation && !is_positional_family { + // Skip non-positional aesthetics for annotation layers + continue; + } + if let Some(df) = data_map.get(&naming::layer_key(i)) { for aes_name in &aesthetics_to_check { if let Some(AestheticValue::Column { name, .. }) = layer.mappings.get(aes_name) { diff --git a/src/plot/main.rs b/src/plot/main.rs index ec3d0ffd..00c95bd7 100644 --- a/src/plot/main.rs +++ b/src/plot/main.rs @@ -75,8 +75,12 @@ fn recycle_value_to_length( ParameterValue::Null => Ok(ParameterValue::Array( vec![ArrayElement::Null; target_length], )), - // Arrays: recycle if length 1, return unchanged if matches target, error otherwise + // Arrays: homogenize types if mixed, then recycle if needed ParameterValue::Array(arr) => { + // Homogenize array if it has mixed incompatible types + let arr = ArrayElement::homogenize(&arr); + + // Now handle recycling if arr.len() == 1 { // Recycle the single element let element = arr[0].clone(); @@ -297,16 +301,20 @@ impl Plot { } } - // Step 2: Move required/positional aesthetics to mappings with recycling + // Step 2: Move required/positional/array aesthetics to mappings with recycling let required_aesthetics = layer.geom.aesthetics().required(); let param_keys: Vec = layer.parameters.keys().cloned().collect(); for param_name in param_keys { - // Check if this is a positional aesthetic OR a required aesthetic + // Check if this is a positional aesthetic OR a required aesthetic OR an array let is_positional = crate::plot::aesthetic::is_positional_aesthetic(¶m_name); let is_required = required_aesthetics.contains(¶m_name.as_str()); + let is_array = matches!( + layer.parameters.get(¶m_name), + Some(ParameterValue::Array(_)) + ); - if is_positional || is_required { + if is_positional || is_required || is_array { // Skip if already in mappings if layer.mappings.contains_key(¶m_name) { continue; diff --git a/src/plot/types.rs b/src/plot/types.rs index 8b5e4edb..a9db97db 100644 --- a/src/plot/types.rs +++ b/src/plot/types.rs @@ -618,6 +618,34 @@ impl ArrayElement { } } + /// Homogenize a slice of array elements to a common type. + /// + /// Infers the target type from all elements, then attempts to coerce all elements + /// to that type. If coercion fails (e.g., string + number), falls back to String type. + /// + /// Returns a new vector with homogenized elements. + pub fn homogenize(values: &[Self]) -> Vec { + // Infer target type from all elements + let Some(target_type) = Self::infer_type(values) else { + // All nulls or empty array - return cloned as-is + return values.to_vec(); + }; + + // Try to coerce all elements to the inferred type + let coerced: Result, _> = values.iter().map(|elem| elem.coerce_to(target_type)).collect(); + + match coerced { + Ok(coerced_arr) => coerced_arr, + Err(_) => { + // Coercion failed - fall back to String type + values + .iter() + .map(|elem| elem.coerce_to(ArrayElementType::String).unwrap_or(Self::Null)) + .collect() + } + } + } + /// Get the type name for error messages. fn type_name(&self) -> &'static str { match self { @@ -1375,4 +1403,26 @@ mod tests { let result = elem.coerce_to(ArrayElementType::Date); assert!(result.is_err()); } + + #[test] + fn test_homogenize_mixed_number_string() { + let arr = vec![ + ArrayElement::Number(1.0), + ArrayElement::String("foo".to_string()), + ]; + + let homogenized = ArrayElement::homogenize(&arr); + + // Should fall back to String type since "foo" can't be coerced to Number + assert_eq!(homogenized.len(), 2); + assert!(matches!(homogenized[0], ArrayElement::String(_))); + assert!(matches!(homogenized[1], ArrayElement::String(_))); + + if let ArrayElement::String(s) = &homogenized[0] { + assert_eq!(s, "1"); + } + if let ArrayElement::String(s) = &homogenized[1] { + assert_eq!(s, "foo"); + } + } } diff --git a/src/writer/vegalite/data.rs b/src/writer/vegalite/data.rs index 4b1c89e3..a19b7e36 100644 --- a/src/writer/vegalite/data.rs +++ b/src/writer/vegalite/data.rs @@ -97,16 +97,8 @@ pub(super) fn series_value_at(series: &Series, idx: usize) -> Result { let ca = series .str() .map_err(|e| GgsqlError::WriterError(format!("Failed to cast to string: {}", e)))?; - // Try to parse as number if it looks numeric - if let Some(val) = ca.get(idx) { - if let Ok(num) = val.parse::() { - Ok(json!(num)) - } else { - Ok(json!(val)) - } - } else { - Ok(Value::Null) - } + // Keep strings as strings (don't parse to numbers) + Ok(ca.get(idx).map(|v| json!(v)).unwrap_or(Value::Null)) } Date => { // Convert days since epoch to ISO date string: "YYYY-MM-DD" From 16abba9e391b6c8b4a153ddf7b3cde78b3193230 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Thu, 5 Mar 2026 16:36:47 +0100 Subject: [PATCH 07/26] Refactor SQL generation and array recycling to methods on types Extract standalone functions into methods on their respective types for better encapsulation and API design: - Add ArrayElement::to_sql() for SQL literal conversion - Add ParameterValue::to_sql() for SQL literal conversion (including CASE statements for arrays) - Add ParameterValue::rep(n) for array recycling (replaces recycle_value_to_length) - Add ParameterValue::to_array_element() helper for scalar conversion Key improvements: - Simplified rep() to only homogenize arrays when arr.len() == n - Unified scalar handling through to_array_element() catch-all - Removed literal_to_sql() standalone function - Removed recycle_value_to_length() standalone function Co-Authored-By: Claude Sonnet 4.5 --- src/execute/casting.rs | 56 +-------------------- src/execute/layer.rs | 4 +- src/plot/main.rs | 45 +---------------- src/plot/types.rs | 109 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 113 insertions(+), 101 deletions(-) diff --git a/src/execute/casting.rs b/src/execute/casting.rs index 443eb3d7..513b171f 100644 --- a/src/execute/casting.rs +++ b/src/execute/casting.rs @@ -4,7 +4,7 @@ //! scale requirements and updating type info accordingly. use crate::plot::scale::coerce_dtypes; -use crate::plot::{ArrayElement, CastTargetType, Layer, ParameterValue, Plot, SqlTypeNames}; +use crate::plot::{CastTargetType, Layer, Plot, SqlTypeNames}; use crate::{naming, DataSource}; use polars::prelude::{DataType, TimeUnit}; use std::collections::{HashMap, HashSet}; @@ -22,60 +22,6 @@ pub struct TypeRequirement { pub sql_type_name: String, } -/// Format a literal value as SQL -pub fn literal_to_sql(lit: &ParameterValue) -> String { - match lit { - ParameterValue::String(s) => format!("'{}'", s.replace('\'', "''")), - ParameterValue::Number(n) => n.to_string(), - ParameterValue::Boolean(b) => { - if *b { - "TRUE".to_string() - } else { - "FALSE".to_string() - } - } - ParameterValue::Array(arr) => { - // Generate CASE WHEN statement to select array element by row number - // The annotation layer dummy table has __ggsql_dummy__ column with values 1, 2, 3, ... - // Arrays are homogeneous (homogenized in recycle_value_to_length), so no type mixing - let mut case_stmt = String::from("CASE __ggsql_dummy__"); - for (i, elem) in arr.iter().enumerate() { - let row_num = i + 1; // Row numbers start at 1 - let value_sql = match elem { - ArrayElement::String(s) => format!("'{}'", s.replace('\'', "''")), - ArrayElement::Number(n) => n.to_string(), - ArrayElement::Boolean(b) => { - if *b { - "TRUE".to_string() - } else { - "FALSE".to_string() - } - } - ArrayElement::Date(d) => { - // Convert days since epoch to DATE - format!("DATE '1970-01-01' + INTERVAL {} DAY", d) - } - ArrayElement::DateTime(dt) => { - // Convert microseconds since epoch to TIMESTAMP - format!("TIMESTAMP '1970-01-01 00:00:00' + INTERVAL {} MICROSECOND", dt) - } - ArrayElement::Time(t) => { - // Convert nanoseconds since midnight to TIME - let seconds = t / 1_000_000_000; - let nanos = t % 1_000_000_000; - format!("TIME '00:00:00' + INTERVAL {} SECOND + INTERVAL {} NANOSECOND", seconds, nanos) - } - ArrayElement::Null => "NULL".to_string(), - }; - case_stmt.push_str(&format!(" WHEN {} THEN {}", row_num, value_sql)); - } - case_stmt.push_str(" END"); - case_stmt - } - ParameterValue::Null => "NULL".to_string(), - } -} - /// Determine which columns need casting based on scale requirements. /// /// For each layer, collects columns that need casting to match the scale's diff --git a/src/execute/layer.rs b/src/execute/layer.rs index d4d911fb..42684b90 100644 --- a/src/execute/layer.rs +++ b/src/execute/layer.rs @@ -11,7 +11,7 @@ use crate::{naming, DataFrame, GgsqlError, Result}; use polars::prelude::DataType; use std::collections::{HashMap, HashSet}; -use super::casting::{literal_to_sql, TypeRequirement}; +use super::casting::TypeRequirement; use super::schema::build_aesthetic_schema; /// Build the source query for a layer. @@ -90,7 +90,7 @@ pub fn build_layer_select_list( } AestheticValue::Literal(lit) => { // Literals become columns with prefixed aesthetic name - format!("{} AS \"{}\"", literal_to_sql(lit), aes_col_name) + format!("{} AS \"{}\"", lit.to_sql(), aes_col_name) } }; diff --git a/src/plot/main.rs b/src/plot/main.rs index 00c95bd7..1b423095 100644 --- a/src/plot/main.rs +++ b/src/plot/main.rs @@ -57,49 +57,6 @@ pub use super::facet::{Facet, FacetLayout}; /// - Scalar values (Number, String, Boolean) → Array with n copies as ArrayElements /// - Length-1 arrays → Expand to n copies of the single element /// - Length-n arrays → Return unchanged if n matches target, error otherwise -fn recycle_value_to_length( - value: ParameterValue, - target_length: usize, -) -> Result { - match value { - // Scalars: convert to array with n copies as ArrayElements - ParameterValue::Number(n) => Ok(ParameterValue::Array( - vec![ArrayElement::Number(n); target_length], - )), - ParameterValue::String(s) => Ok(ParameterValue::Array( - vec![ArrayElement::String(s); target_length], - )), - ParameterValue::Boolean(b) => Ok(ParameterValue::Array( - vec![ArrayElement::Boolean(b); target_length], - )), - ParameterValue::Null => Ok(ParameterValue::Array( - vec![ArrayElement::Null; target_length], - )), - // Arrays: homogenize types if mixed, then recycle if needed - ParameterValue::Array(arr) => { - // Homogenize array if it has mixed incompatible types - let arr = ArrayElement::homogenize(&arr); - - // Now handle recycling - if arr.len() == 1 { - // Recycle the single element - let element = arr[0].clone(); - Ok(ParameterValue::Array(vec![element; target_length])) - } else if arr.len() == target_length { - // Already correct length - Ok(ParameterValue::Array(arr)) - } else { - // Mismatched length - shouldn't happen if validation passed - Err(crate::GgsqlError::InternalError(format!( - "Attempted to recycle array of length {} to length {} (should have been caught earlier)", - arr.len(), - target_length - ))) - } - } - } -} - // ============================================================================= // Plot Type // ============================================================================= @@ -323,7 +280,7 @@ impl Plot { // Move from parameters to mappings with recycling if let Some(value) = layer.parameters.remove(¶m_name) { let recycled_value = if max_length > 1 { - recycle_value_to_length(value, max_length)? + value.rep(max_length)? } else { value }; diff --git a/src/plot/types.rs b/src/plot/types.rs index a9db97db..8c663d8e 100644 --- a/src/plot/types.rs +++ b/src/plot/types.rs @@ -646,6 +646,38 @@ impl ArrayElement { } } + /// Convert this element to a SQL literal string. + /// + /// Used for generating SQL expressions from literal values. + pub fn to_sql(&self) -> String { + match self { + Self::String(s) => format!("'{}'", s.replace('\'', "''")), + Self::Number(n) => n.to_string(), + Self::Boolean(b) => { + if *b { + "TRUE".to_string() + } else { + "FALSE".to_string() + } + } + Self::Date(d) => { + // Convert days since epoch to DATE + format!("DATE '1970-01-01' + INTERVAL {} DAY", d) + } + Self::DateTime(dt) => { + // Convert microseconds since epoch to TIMESTAMP + format!("TIMESTAMP '1970-01-01 00:00:00' + INTERVAL {} MICROSECOND", dt) + } + Self::Time(t) => { + // Convert nanoseconds since midnight to TIME + let seconds = t / 1_000_000_000; + let nanos = t % 1_000_000_000; + format!("TIME '00:00:00' + INTERVAL {} SECOND + INTERVAL {} NANOSECOND", seconds, nanos) + } + Self::Null => "NULL".to_string(), + } + } + /// Get the type name for error messages. fn type_name(&self) -> &'static str { match self { @@ -825,6 +857,83 @@ impl ParameterValue { _ => None, } } + + /// Convert this parameter value to a SQL literal string. + /// + /// For arrays, generates a CASE WHEN statement that selects array elements by row number + /// using the __ggsql_dummy__ column. + pub fn to_sql(&self) -> String { + match self { + ParameterValue::String(s) => format!("'{}'", s.replace('\'', "''")), + ParameterValue::Number(n) => n.to_string(), + ParameterValue::Boolean(b) => { + if *b { + "TRUE".to_string() + } else { + "FALSE".to_string() + } + } + ParameterValue::Array(arr) => { + let mut case_stmt = String::from("CASE __ggsql_dummy__"); + for (i, elem) in arr.iter().enumerate() { + let row_num = i + 1; + let value_sql = elem.to_sql(); + case_stmt.push_str(&format!(" WHEN {} THEN {}", row_num, value_sql)); + } + case_stmt.push_str(" END"); + case_stmt + } + ParameterValue::Null => "NULL".to_string(), + } + } + + /// Convert a scalar ParameterValue to an ArrayElement. + /// + /// Panics if called on an Array variant. + fn to_array_element(self) -> ArrayElement { + match self { + ParameterValue::Number(num) => ArrayElement::Number(num), + ParameterValue::String(s) => ArrayElement::String(s), + ParameterValue::Boolean(b) => ArrayElement::Boolean(b), + ParameterValue::Null => ArrayElement::Null, + ParameterValue::Array(_) => panic!("Cannot convert Array to single ArrayElement"), + } + } + + /// Recycle this value to a target array length. + /// + /// - Scalars (String, Number, Boolean, Null) are converted to arrays with n copies + /// - Arrays of length 1 are recycled to n copies of that element + /// - Arrays of target length are returned as-is (after homogenization) + /// - Arrays of other lengths produce an error + pub fn rep(self, n: usize) -> Result { + match self { + // Arrays: homogenize types if mixed, then recycle if needed + ParameterValue::Array(arr) => { + if arr.len() == 1 { + // Recycle the single element + let element = arr[0].clone(); + Ok(ParameterValue::Array(vec![element; n])) + } else if arr.len() == n { + // Already correct length - homogenize for type consistency + let arr = ArrayElement::homogenize(&arr); + Ok(ParameterValue::Array(arr)) + } else { + // Mismatched length - shouldn't happen if validation passed + Err(crate::GgsqlError::InternalError(format!( + "Attempted to recycle array of length {} to length {} (should have been caught earlier)", + arr.len(), + n + ))) + } + } + // Scalars: convert to ArrayElement and replicate n times + scalar => { + let elem = scalar.to_array_element(); + Ok(ParameterValue::Array(vec![elem; n])) + } + } + } } // ============================================================================= From 6b41815bf72a3d77a75ccf9e5fc887092e8c71f2 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Thu, 5 Mar 2026 17:32:29 +0100 Subject: [PATCH 08/26] Add is_scaled field to prevent annotation literals from using scales Fixes issue where PLACE layer literals (e.g., stroke => ['red', 'blue']) would incorrectly use scales from other layers. Changes: - Add is_scaled field to AestheticValue::Column - Add identity_column() helper for creating unscaled columns - Mark annotation layer non-positional literals with is_scaled: false - Use identity_scale when is_scaled is false in encoding Example: PLACE text with stroke => ['red', 'blue'] now renders with literal colors instead of being transformed by scales from other layers. Co-Authored-By: Claude Sonnet 4.5 --- src/execute/layer.rs | 1 + src/execute/mod.rs | 1 + src/execute/scale.rs | 24 ++++++++---------------- src/plot/layer/mod.rs | 17 ++++++++++++++--- src/plot/types.rs | 17 +++++++++++++++++ src/writer/vegalite/encoding.rs | 6 ++++-- 6 files changed, 45 insertions(+), 21 deletions(-) diff --git a/src/execute/layer.rs b/src/execute/layer.rs index 42684b90..fcbe46c1 100644 --- a/src/execute/layer.rs +++ b/src/execute/layer.rs @@ -498,6 +498,7 @@ where name: prefixed_name, original_name, is_dummy, + is_scaled: true, }; layer.mappings.insert(aesthetic.clone(), value); } diff --git a/src/execute/mod.rs b/src/execute/mod.rs index 7ec4d3db..78520661 100644 --- a/src/execute/mod.rs +++ b/src/execute/mod.rs @@ -308,6 +308,7 @@ fn add_facet_mappings_to_layers( name: var.to_string(), original_name: Some(var.to_string()), is_dummy: false, + is_scaled: true, }, ); } diff --git a/src/execute/scale.rs b/src/execute/scale.rs index a9e52d7d..28094526 100644 --- a/src/execute/scale.rs +++ b/src/execute/scale.rs @@ -14,7 +14,7 @@ use crate::plot::{ AestheticValue, ArrayElement, ArrayElementType, ColumnInfo, Layer, ParameterValue, Plot, Scale, ScaleType, ScaleTypeKind, Schema, }; -use crate::{DataFrame, DataSource, GgsqlError, Result}; +use crate::{DataFrame, GgsqlError, Result}; use polars::prelude::Column; use std::collections::{HashMap, HashSet}; @@ -995,8 +995,6 @@ pub fn find_columns_for_aesthetic<'a>( data_map: &'a HashMap, aesthetic_ctx: &AestheticContext, ) -> Vec<&'a Column> { - use crate::plot::aesthetic::is_positional_aesthetic; - let mut column_refs = Vec::new(); let aesthetics_to_check = aesthetic_ctx .internal_positional_family(aesthetic) @@ -1005,21 +1003,15 @@ pub fn find_columns_for_aesthetic<'a>( // Check each layer's mapping - every layer has its own data for (i, layer) in layers.iter().enumerate() { - // For annotation layers, only train scales for positional aesthetics (pos1, pos2) - // Other aesthetics (label, color, size, etc.) should use literal values - let is_annotation = matches!(layer.source, Some(DataSource::Annotation(_))); - let is_positional_family = is_positional_aesthetic(aesthetic); - - if is_annotation && !is_positional_family { - // Skip non-positional aesthetics for annotation layers - continue; - } - if let Some(df) = data_map.get(&naming::layer_key(i)) { for aes_name in &aesthetics_to_check { - if let Some(AestheticValue::Column { name, .. }) = layer.mappings.get(aes_name) { - if let Ok(column) = df.column(name) { - column_refs.push(column); + if let Some(AestheticValue::Column { name, is_scaled, .. }) = layer.mappings.get(aes_name) { + // Only train scales for columns that should be scaled + // Annotation layer literals have is_scaled: false + if *is_scaled { + if let Ok(column) = df.column(name) { + column_refs.push(column); + } } } } diff --git a/src/plot/layer/mod.rs b/src/plot/layer/mod.rs index 84076071..5c7e80ab 100644 --- a/src/plot/layer/mod.rs +++ b/src/plot/layer/mod.rs @@ -239,6 +239,9 @@ impl Layer { /// happens in Polars after query execution, before the data goes to the writer. pub fn update_mappings_for_aesthetic_columns(&mut self) { use crate::naming; + use crate::plot::aesthetic::is_positional_aesthetic; + + let is_annotation = matches!(self.source, Some(crate::DataSource::Annotation(_))); for (aesthetic, value) in self.mappings.aesthetics.iter_mut() { let aes_col_name = naming::aesthetic_column(aesthetic); @@ -256,9 +259,14 @@ impl Layer { *name = aes_col_name; } AestheticValue::Literal(_) => { - // Literals are also columns with prefixed aesthetic name - // Note: literals don't have an original_name to preserve - *value = AestheticValue::standard_column(aes_col_name); + // Literals become columns with prefixed aesthetic name + // For annotation layers, non-positional aesthetics should use identity scales + let is_positional = is_positional_aesthetic(aesthetic); + *value = if is_annotation && !is_positional { + AestheticValue::identity_column(aes_col_name) + } else { + AestheticValue::standard_column(aes_col_name) + }; } } } @@ -285,6 +293,7 @@ impl Layer { AestheticValue::Column { original_name, is_dummy, + is_scaled, .. } => { // Use the stat name from remappings as the original_name for labels @@ -293,6 +302,7 @@ impl Layer { name: prefixed_name, original_name: original_name.clone(), is_dummy: *is_dummy, + is_scaled: *is_scaled, } } AestheticValue::Literal(_) => { @@ -302,6 +312,7 @@ impl Layer { name: prefixed_name, original_name: None, is_dummy: false, + is_scaled: true, } } }; diff --git a/src/plot/types.rs b/src/plot/types.rs index 8c663d8e..e1821779 100644 --- a/src/plot/types.rs +++ b/src/plot/types.rs @@ -182,6 +182,10 @@ pub enum AestheticValue { original_name: Option, /// Whether this is a dummy/placeholder column (e.g., for bar charts without x mapped) is_dummy: bool, + /// Whether scales should be applied to this column + /// Set to false for annotation layer literals (e.g., stroke => ['red', 'blue']) + /// that are converted to columns but should use identity scales + is_scaled: bool, }, /// Literal value (quoted string, number, or boolean) Literal(ParameterValue), @@ -194,6 +198,17 @@ impl AestheticValue { name: name.into(), original_name: None, is_dummy: false, + is_scaled: true, + } + } + + /// Create a column mapping with identity scale (no scale transformation) + pub fn identity_column(name: impl Into) -> Self { + Self::Column { + name: name.into(), + original_name: None, + is_dummy: false, + is_scaled: false, } } @@ -203,6 +218,7 @@ impl AestheticValue { name: name.into(), original_name: None, is_dummy: true, + is_scaled: true, } } @@ -215,6 +231,7 @@ impl AestheticValue { name: name.into(), original_name: Some(original_name.into()), is_dummy: false, + is_scaled: true, } } diff --git a/src/writer/vegalite/encoding.rs b/src/writer/vegalite/encoding.rs index f91ace3a..b5411a90 100644 --- a/src/writer/vegalite/encoding.rs +++ b/src/writer/vegalite/encoding.rs @@ -802,7 +802,8 @@ pub(super) fn build_encoding_channel( name: col, original_name, is_dummy, - } => build_column_encoding(aesthetic, col, original_name, *is_dummy, ctx), + is_scaled, + } => build_column_encoding(aesthetic, col, original_name, *is_dummy, *is_scaled, ctx), AestheticValue::Literal(lit) => build_literal_encoding(aesthetic, lit), } } @@ -813,13 +814,14 @@ fn build_column_encoding( col: &str, original_name: &Option, is_dummy: bool, + is_scaled: bool, ctx: &mut EncodingContext, ) -> Result { let aesthetic_ctx = ctx.spec.get_aesthetic_context(); let primary = aesthetic_ctx .primary_internal_positional(aesthetic) .unwrap_or(aesthetic); - let mut identity_scale = false; + let mut identity_scale = !is_scaled; // Determine field type from scale or infer from data let field_type = determine_field_type_for_aesthetic( From 8bc43e8f4550b84b8b27f833644733635e73e655 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Fri, 6 Mar 2026 11:00:35 +0100 Subject: [PATCH 09/26] error when writing array value encoding --- src/execute/casting.rs | 5 ++++- src/execute/mod.rs | 17 ++++++++++++----- src/execute/scale.rs | 5 ++++- src/lib.rs | 3 ++- src/parser/mod.rs | 5 ++++- src/plot/main.rs | 10 ---------- src/plot/types.rs | 28 ++++++++++++++++++++-------- src/writer/vegalite/encoding.rs | 6 ++++++ 8 files changed, 52 insertions(+), 27 deletions(-) diff --git a/src/execute/casting.rs b/src/execute/casting.rs index 513b171f..403ff606 100644 --- a/src/execute/casting.rs +++ b/src/execute/casting.rs @@ -216,7 +216,10 @@ pub fn determine_layer_source( } else { // Generate VALUES clause with n rows: (VALUES (1), (2), ..., (n)) AS t(col) let rows: Vec = (1..=*n).map(|i| format!("({})", i)).collect(); - format!("(SELECT * FROM (VALUES {}) AS t(__ggsql_dummy__))", rows.join(", ")) + format!( + "(SELECT * FROM (VALUES {}) AS t(__ggsql_dummy__))", + rows.join(", ") + ) } } None => { diff --git a/src/execute/mod.rs b/src/execute/mod.rs index 78520661..6de7bda1 100644 --- a/src/execute/mod.rs +++ b/src/execute/mod.rs @@ -2228,12 +2228,19 @@ mod tests { let result = prepare_data_with_reader(query, &reader).unwrap(); assert_eq!(result.specs.len(), 1); - assert_eq!(result.specs[0].layers.len(), 2, "Should have DRAW + PLACE layers"); + assert_eq!( + result.specs[0].layers.len(), + 2, + "Should have DRAW + PLACE layers" + ); // First layer: regular DRAW point let point_layer = &result.specs[0].layers[0]; assert_eq!(point_layer.geom, crate::Geom::point()); - assert!(point_layer.source.is_none(), "DRAW layer should have no explicit source"); + assert!( + point_layer.source.is_none(), + "DRAW layer should have no explicit source" + ); // Second layer: PLACE text annotation let annotation_layer = &result.specs[0].layers[1]; @@ -2441,9 +2448,9 @@ mod tests { // DRAW layer should have inherited global color mapping let point_layer = &result.specs[0].layers[0]; assert!( - point_layer.mappings.contains_key("color") || - point_layer.mappings.contains_key("fill") || - point_layer.mappings.contains_key("stroke"), + point_layer.mappings.contains_key("color") + || point_layer.mappings.contains_key("fill") + || point_layer.mappings.contains_key("stroke"), "DRAW layer should inherit color from global mappings" ); diff --git a/src/execute/scale.rs b/src/execute/scale.rs index 28094526..b8127e90 100644 --- a/src/execute/scale.rs +++ b/src/execute/scale.rs @@ -1005,7 +1005,10 @@ pub fn find_columns_for_aesthetic<'a>( for (i, layer) in layers.iter().enumerate() { if let Some(df) = data_map.get(&naming::layer_key(i)) { for aes_name in &aesthetics_to_check { - if let Some(AestheticValue::Column { name, is_scaled, .. }) = layer.mappings.get(aes_name) { + if let Some(AestheticValue::Column { + name, is_scaled, .. + }) = layer.mappings.get(aes_name) + { // Only train scales for columns that should be scaled // Annotation layer literals have is_scaled: false if *is_scaled { diff --git a/src/lib.rs b/src/lib.rs index 0363aa59..f968a673 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -816,7 +816,8 @@ mod integration_tests { // Note: stroke color goes through resolve_aesthetics and may be in different location // Just verify it's present somewhere as a literal let text_mark = &text_layer["mark"]; - let has_stroke_value = encoding.get("stroke") + let has_stroke_value = encoding + .get("stroke") .and_then(|s| s.get("value")) .is_some() || text_mark.get("stroke").is_some(); diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 0d9983f3..2979a51e 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -432,7 +432,10 @@ mod tests { // First layer: regular DRAW point assert_eq!(specs[0].layers[0].geom, Geom::point()); - assert!(specs[0].layers[0].source.is_none(), "DRAW layer should have no explicit source"); + assert!( + specs[0].layers[0].source.is_none(), + "DRAW layer should have no explicit source" + ); // Second layer: PLACE text with annotation source assert_eq!(specs[0].layers[1].geom, Geom::text()); diff --git a/src/plot/main.rs b/src/plot/main.rs index 1b423095..390b37e2 100644 --- a/src/plot/main.rs +++ b/src/plot/main.rs @@ -47,16 +47,6 @@ pub use super::projection::{Coord, Projection}; // Re-export Facet types from the facet module pub use super::facet::{Facet, FacetLayout}; -// ============================================================================= -// Helper Functions -// ============================================================================= - -/// Recycle a scalar or length-1 array to a target length. -/// -/// Recycling rules: -/// - Scalar values (Number, String, Boolean) → Array with n copies as ArrayElements -/// - Length-1 arrays → Expand to n copies of the single element -/// - Length-n arrays → Return unchanged if n matches target, error otherwise // ============================================================================= // Plot Type // ============================================================================= diff --git a/src/plot/types.rs b/src/plot/types.rs index e1821779..d5955b68 100644 --- a/src/plot/types.rs +++ b/src/plot/types.rs @@ -649,7 +649,10 @@ impl ArrayElement { }; // Try to coerce all elements to the inferred type - let coerced: Result, _> = values.iter().map(|elem| elem.coerce_to(target_type)).collect(); + let coerced: Result, _> = values + .iter() + .map(|elem| elem.coerce_to(target_type)) + .collect(); match coerced { Ok(coerced_arr) => coerced_arr, @@ -657,7 +660,10 @@ impl ArrayElement { // Coercion failed - fall back to String type values .iter() - .map(|elem| elem.coerce_to(ArrayElementType::String).unwrap_or(Self::Null)) + .map(|elem| { + elem.coerce_to(ArrayElementType::String) + .unwrap_or(Self::Null) + }) .collect() } } @@ -683,13 +689,19 @@ impl ArrayElement { } Self::DateTime(dt) => { // Convert microseconds since epoch to TIMESTAMP - format!("TIMESTAMP '1970-01-01 00:00:00' + INTERVAL {} MICROSECOND", dt) + format!( + "TIMESTAMP '1970-01-01 00:00:00' + INTERVAL {} MICROSECOND", + dt + ) } Self::Time(t) => { // Convert nanoseconds since midnight to TIME let seconds = t / 1_000_000_000; let nanos = t % 1_000_000_000; - format!("TIME '00:00:00' + INTERVAL {} SECOND + INTERVAL {} NANOSECOND", seconds, nanos) + format!( + "TIME '00:00:00' + INTERVAL {} SECOND + INTERVAL {} NANOSECOND", + seconds, nanos + ) } Self::Null => "NULL".to_string(), } @@ -907,11 +919,11 @@ impl ParameterValue { /// Convert a scalar ParameterValue to an ArrayElement. /// /// Panics if called on an Array variant. - fn to_array_element(self) -> ArrayElement { + fn to_array_element(&self) -> ArrayElement { match self { - ParameterValue::Number(num) => ArrayElement::Number(num), - ParameterValue::String(s) => ArrayElement::String(s), - ParameterValue::Boolean(b) => ArrayElement::Boolean(b), + ParameterValue::Number(num) => ArrayElement::Number(*num), + ParameterValue::String(s) => ArrayElement::String(s.clone()), + ParameterValue::Boolean(b) => ArrayElement::Boolean(*b), ParameterValue::Null => ArrayElement::Null, ParameterValue::Array(_) => panic!("Cannot convert Array to single ArrayElement"), } diff --git a/src/writer/vegalite/encoding.rs b/src/writer/vegalite/encoding.rs index b5411a90..d6f53d2f 100644 --- a/src/writer/vegalite/encoding.rs +++ b/src/writer/vegalite/encoding.rs @@ -939,6 +939,12 @@ fn build_literal_encoding(aesthetic: &str, lit: &ParameterValue) -> Result json!(n), } } + ParameterValue::Array(_) => { + return Err(crate::GgsqlError::WriterError(format!( + "The `{aes}` SETTING must be scalar, not an array.", + aes = aesthetic + ))) + } _ => lit.to_json(), }; Ok(json!({"value": val})) From 3c63669c614acf82dfc5c3e3dc01e69cce6a62ea Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Fri, 6 Mar 2026 11:35:38 +0100 Subject: [PATCH 10/26] add docs --- doc/_quarto.yml | 4 ++++ doc/syntax/clause/place.qmd | 37 +++++++++++++++++++++++++++++++++++++ doc/syntax/index.qmd | 1 + 3 files changed, 42 insertions(+) create mode 100644 doc/syntax/clause/place.qmd diff --git a/doc/_quarto.yml b/doc/_quarto.yml index a5801133..f5684647 100644 --- a/doc/_quarto.yml +++ b/doc/_quarto.yml @@ -39,6 +39,8 @@ website: href: syntax/clause/visualise.qmd - text: "`DRAW`" href: syntax/clause/draw.qmd + - text: "`PLACE`" + href: syntax/clause/place.qmd - text: "`SCALE`" href: syntax/clause/scale.qmd - text: "`FACET`" @@ -69,6 +71,8 @@ website: href: syntax/clause/visualise.qmd - text: "`DRAW`" href: syntax/clause/draw.qmd + - text: "`PLACE`" + href: syntax/clause/place.qmd - text: "`SCALE`" href: syntax/clause/scale.qmd - text: "`FACET`" diff --git a/doc/syntax/clause/place.qmd b/doc/syntax/clause/place.qmd new file mode 100644 index 00000000..6321bbed --- /dev/null +++ b/doc/syntax/clause/place.qmd @@ -0,0 +1,37 @@ +--- +title: "Create annotation layers with `PLACE`" +--- + +The `PLACE` clause is the little sibling of the [`DRAW` clause](../clause/draw.qmd) that also creates layers. +A layer created with `PLACE` has no mappings to data, all aesthetics are set using literal values instead. + +## Clause syntax +The `PLACE` clause takes just a type and a setting clause, all of them required. + +```ggsql +PLACE + SETTING => , ... +``` + +Like `DRAW`, the layer type is required and specifies the type of layer to draw, like `point` or `text`. +It defines how the remaining settings are interpreted. +The [main syntax page](../index.qmd#layers) has a list of all available layer types + +Unlike the `DRAW` clause, the `PLACE` clause does not support `FILTER`, `PARTITION BY`, and `ORDER BY` clauses since +everything is declared inline. + +### `SETTING` +```ggsql +SETTING => , ... +``` + +The `SETTING` clause can be used for to different things: + +* *Setting aesthetics*: All aesthetics in `PLACE` layers are specified using literal value, e.g. 'red' (as in the color red). +Aesthetics that are set will not go through a scale but will use the provided value as-is. +You cannot set an aesthetic to a column, only to a literal values. +Contrary to `DRAW` layers, `PLACE` layers can take multiple literal values in an array. +* *Setting parameters*: Some layers take additional arguments that control how they behave. +Often, but not always, these modify the statistical transformation in some way. +An example would be the binwidth parameter in histogram which controls the width of each bin during histogram calculation. +This is not a statistical property since it is not related to each record, but to the calculation as a whole. diff --git a/doc/syntax/index.qmd b/doc/syntax/index.qmd index 1400280f..c58fa89e 100644 --- a/doc/syntax/index.qmd +++ b/doc/syntax/index.qmd @@ -7,6 +7,7 @@ ggsql augments the standard SQL syntax with a number of new clauses to describe - [`VISUALISE`](clause/visualise.qmd) initiates the visualisation part of the query - [`DRAW`](clause/draw.qmd) adds a new layer to the visualisation + - [`PLACE`](clause/place.qmd) adds an annotation layer - [`SCALE`](clause/scale.qmd) specify how an aesthetic should be scaled - [`FACET`](clause/facet.qmd) describes how data should be split into small multiples - [`PROJECT`](clause/project.qmd) is used for selecting the coordinate system to use From c8d571a6e19e36765cc39e4a51fde3559b7163d7 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Fri, 6 Mar 2026 14:06:57 +0100 Subject: [PATCH 11/26] Refactor annotation layer SQL generation for clarity Simplifies annotation layer handling by consolidating SQL generation logic: - Unified layer_source_query() to handle all source types including annotations - Moved determine_layer_source() from casting.rs to layer.rs and merged into layer_source_query() - Removed CASE statement approach in favor of direct VALUES clause generation - build_annotation_values_clause() now generates single SELECT...FROM VALUES statement - Updated ParameterValue::to_sql() to panic on arrays (arrays handled separately) - Cleaned up unused imports in casting.rs This eliminates redundant function calls and awkward unreachable!() cases, making the annotation SQL generation more straightforward. Co-Authored-By: Claude Sonnet 4.5 --- src/execute/casting.rs | 52 +--------------- src/execute/layer.rs | 133 +++++++++++++++++++++++++++++++++++++---- src/plot/types.rs | 15 ++--- 3 files changed, 130 insertions(+), 70 deletions(-) diff --git a/src/execute/casting.rs b/src/execute/casting.rs index 403ff606..fd56eb7a 100644 --- a/src/execute/casting.rs +++ b/src/execute/casting.rs @@ -4,10 +4,10 @@ //! scale requirements and updating type info accordingly. use crate::plot::scale::coerce_dtypes; -use crate::plot::{CastTargetType, Layer, Plot, SqlTypeNames}; -use crate::{naming, DataSource}; +use crate::plot::{CastTargetType, Plot, SqlTypeNames}; +use crate::naming; use polars::prelude::{DataType, TimeUnit}; -use std::collections::{HashMap, HashSet}; +use std::collections::HashMap; use super::schema::TypeInfo; @@ -183,49 +183,3 @@ pub fn update_type_info_for_casting(type_info: &mut [TypeInfo], requirements: &[ } } } - -/// Determine the data source table name for a layer. -/// -/// Returns the table/CTE name to query from: -/// - Layer with explicit source (CTE, table, file) → that source name -/// - Layer using global data → global table name -pub fn determine_layer_source( - layer: &Layer, - materialized_ctes: &HashSet, - has_global: bool, -) -> String { - match &layer.source { - Some(DataSource::Identifier(name)) => { - if materialized_ctes.contains(name) { - naming::cte_table(name) - } else { - name.clone() - } - } - Some(DataSource::FilePath(path)) => { - format!("'{}'", path) - } - Some(DataSource::Annotation(n)) => { - // Annotation layer - generate a dummy table with n rows. - // The execution pipeline expects all layers to have a DataFrame, even though - // SETTING literals eventually render as Vega-Lite datum values ({"value": ...}) - // that don't reference the data. The n-row dummy satisfies schema detection, - // type resolution, and other intermediate steps that expect data to exist. - if *n == 1 { - "(SELECT 1 AS __ggsql_dummy__)".to_string() - } else { - // Generate VALUES clause with n rows: (VALUES (1), (2), ..., (n)) AS t(col) - let rows: Vec = (1..=*n).map(|i| format!("({})", i)).collect(); - format!( - "(SELECT * FROM (VALUES {}) AS t(__ggsql_dummy__))", - rows.join(", ") - ) - } - } - None => { - // Layer uses global data - caller must ensure has_global is true - debug_assert!(has_global, "Layer has no source and no global data"); - naming::global_table() - } - } -} diff --git a/src/execute/layer.rs b/src/execute/layer.rs index fcbe46c1..e60fe0a3 100644 --- a/src/execute/layer.rs +++ b/src/execute/layer.rs @@ -16,19 +16,40 @@ use super::schema::build_aesthetic_schema; /// Build the source query for a layer. /// -/// Returns `SELECT * FROM source` where source is either: -/// - The layer's explicit source (table, CTE, file) -/// - The global table if layer has no explicit source -/// -/// Note: This is distinct from `build_layer_base_query()` which builds a full -/// SELECT with aesthetic column renames and type casts. +/// Returns a complete query that can be executed to retrieve the layer's data: +/// - Annotation layers → VALUES clause with all aesthetic columns +/// - Table/CTE layers → `SELECT * FROM table_or_cte` +/// - File path layers → `SELECT * FROM 'path'` +/// - Layers without explicit source → `SELECT * FROM __ggsql_global__` pub fn layer_source_query( layer: &Layer, materialized_ctes: &HashSet, has_global: bool, ) -> String { - let source = super::casting::determine_layer_source(layer, materialized_ctes, has_global); - format!("SELECT * FROM {}", source) + match &layer.source { + Some(crate::DataSource::Annotation(n)) => { + // Annotation layers: return complete VALUES clause + build_annotation_values_clause(layer, *n) + } + Some(crate::DataSource::Identifier(name)) => { + // Regular table or CTE + let source = if materialized_ctes.contains(name) { + naming::cte_table(name) + } else { + name.clone() + }; + format!("SELECT * FROM {}", source) + } + Some(crate::DataSource::FilePath(path)) => { + // File path source + format!("SELECT * FROM '{}'", path) + } + None => { + // Layer uses global data + debug_assert!(has_global, "Layer has no source and no global data"); + format!("SELECT * FROM {}", naming::global_table()) + } + } } /// Build the SELECT list for a layer query with aesthetic-renamed columns and casting. @@ -273,6 +294,9 @@ pub fn apply_pre_stat_transform( /// 2. Renames columns to aesthetic names (e.g., "Date" AS "__ggsql_aes_x__") /// 3. Applies type casts based on scale requirements /// +/// For annotation layers, the source_query is already the complete VALUES clause, +/// so it's returned as-is (no wrapping, filtering, or casting needed). +/// /// The resulting query can be used for: /// - Schema completion (fetching min/max values) /// - Scale input range resolution @@ -282,8 +306,8 @@ pub fn apply_pre_stat_transform( /// # Arguments /// /// * `layer` - The layer configuration with aesthetic mappings -/// * `source_query` - The base query for the layer's data source -/// * `type_requirements` - Columns that need type casting +/// * `source_query` - The base query for the layer's data source (for annotations, this is already the VALUES clause) +/// * `type_requirements` - Columns that need type casting (not applicable to annotations) /// /// # Returns /// @@ -293,6 +317,12 @@ pub fn build_layer_base_query( source_query: &str, type_requirements: &[TypeRequirement], ) -> String { + // For annotation layers, source_query is already the VALUES clause from layer_source_query() + // Just return it as-is - no wrapping, filtering, or casting needed + if matches!(layer.source, Some(crate::DataSource::Annotation(_))) { + return source_query.to_string(); + } + // Build SELECT list with aesthetic renames, casts let select_exprs = build_layer_select_list(layer, type_requirements); let select_clause = if select_exprs.is_empty() { @@ -565,3 +595,86 @@ where Ok(final_query) } + +/// Build a VALUES clause for an annotation layer with all aesthetic columns. +/// +/// Generates SQL like: `(VALUES (val1_row1, val2_row1), (val1_row2, val2_row2)) AS t(col1, col2)` +/// +/// This eliminates the need for dummy tables and CASE statements by directly +/// specifying all values in a single VALUES clause with proper column names. +/// +/// # Arguments +/// +/// * `layer` - The annotation layer with aesthetics as Literal values +/// * `n` - Number of rows (from DataSource::Annotation(n)) +/// +/// # Returns +/// +/// A complete SQL expression ready to use as a FROM clause +fn build_annotation_values_clause(layer: &Layer, n: usize) -> String { + use crate::plot::ArrayElement; + + // Collect all aesthetic mappings that are literals + // Convert scalars to single-element vectors for uniform handling + let mut columns: Vec> = Vec::new(); + let mut column_names = Vec::new(); + + for (aesthetic, value) in &layer.mappings.aesthetics { + if let crate::AestheticValue::Literal(param) = value { + let column_values = match param { + crate::plot::ParameterValue::Array(arr) => { + assert_eq!( + arr.len(), + n, + "Array length mismatch: expected {}, got {} for aesthetic '{}'", + n, + arr.len(), + aesthetic + ); + arr.clone() + } + crate::plot::ParameterValue::Number(num) => { + // Scalar number: replicate n times + vec![ArrayElement::Number(*num); n] + } + crate::plot::ParameterValue::String(s) => { + // Scalar string: replicate n times + vec![ArrayElement::String(s.clone()); n] + } + crate::plot::ParameterValue::Boolean(b) => { + // Scalar boolean: replicate n times + vec![ArrayElement::Boolean(*b); n] + } + crate::plot::ParameterValue::Null => { + // Null: replicate n times + vec![ArrayElement::Null; n] + } + }; + + columns.push(column_values); + column_names.push(naming::aesthetic_column(aesthetic)); + } + } + + // Build VALUES rows: (val1_row1, val2_row1), (val1_row2, val2_row2), ... + let mut rows = Vec::new(); + for i in 0..n { + let values: Vec = columns.iter().map(|col| col[i].to_sql()).collect(); + rows.push(format!("({})", values.join(", "))); + } + + // Build complete VALUES clause with column names + let values_clause = rows.join(", "); + let column_list = column_names + .iter() + .map(|c| format!("\"{}\"", c)) + .collect::>() + .join(", "); + + // Return a SELECT statement wrapping the VALUES clause + // This matches the format of regular layer queries and allows proper wrapping by schema queries + format!( + "SELECT * FROM (VALUES {}) AS t({})", + values_clause, column_list + ) +} diff --git a/src/plot/types.rs b/src/plot/types.rs index d5955b68..1861cdf8 100644 --- a/src/plot/types.rs +++ b/src/plot/types.rs @@ -889,8 +889,8 @@ impl ParameterValue { /// Convert this parameter value to a SQL literal string. /// - /// For arrays, generates a CASE WHEN statement that selects array elements by row number - /// using the __ggsql_dummy__ column. + /// Only supports scalar values (String, Number, Boolean, Null). + /// Arrays are handled separately in annotation layer VALUES clause generation. pub fn to_sql(&self) -> String { match self { ParameterValue::String(s) => format!("'{}'", s.replace('\'', "''")), @@ -902,15 +902,8 @@ impl ParameterValue { "FALSE".to_string() } } - ParameterValue::Array(arr) => { - let mut case_stmt = String::from("CASE __ggsql_dummy__"); - for (i, elem) in arr.iter().enumerate() { - let row_num = i + 1; - let value_sql = elem.to_sql(); - case_stmt.push_str(&format!(" WHEN {} THEN {}", row_num, value_sql)); - } - case_stmt.push_str(" END"); - case_stmt + ParameterValue::Array(_) => { + panic!("ParameterValue::to_sql() does not support arrays. Arrays in annotation layers should be handled via VALUES clause generation.") } ParameterValue::Null => "NULL".to_string(), } From 58769019e046da1d9afca11c32fe165d1f52142f Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Fri, 6 Mar 2026 14:23:09 +0100 Subject: [PATCH 12/26] Move array recycling from AST to execution layer Improves separation of concerns by keeping AST stateless: - Changed DataSource::Annotation from Annotation(usize) to Annotation (unit variant) - AST no longer carries execution state (row count) - Moved array recycling logic from process_annotation_layers() to build_annotation_values_clause() - Recycling now happens on-the-fly during SQL generation - Validates array lengths and returns error on mismatch - Simplified process_annotation_layers() from ~85 to ~40 lines The AST now purely describes what the user wrote, while execution handles data preparation and recycling. All 16 annotation tests pass. Co-Authored-By: Claude Sonnet 4.5 --- src/execute/layer.rs | 88 ++++++++++++++++++++++++++----------------- src/execute/mod.rs | 14 ++++--- src/parser/builder.rs | 6 +-- src/parser/mod.rs | 2 +- src/plot/layer/mod.rs | 2 +- src/plot/main.rs | 55 +++++---------------------- src/plot/types.rs | 10 ++--- 7 files changed, 81 insertions(+), 96 deletions(-) diff --git a/src/execute/layer.rs b/src/execute/layer.rs index e60fe0a3..0f477005 100644 --- a/src/execute/layer.rs +++ b/src/execute/layer.rs @@ -25,11 +25,11 @@ pub fn layer_source_query( layer: &Layer, materialized_ctes: &HashSet, has_global: bool, -) -> String { +) -> Result { match &layer.source { - Some(crate::DataSource::Annotation(n)) => { - // Annotation layers: return complete VALUES clause - build_annotation_values_clause(layer, *n) + Some(crate::DataSource::Annotation) => { + // Annotation layers: return complete VALUES clause (with on-the-fly recycling) + build_annotation_values_clause(layer) } Some(crate::DataSource::Identifier(name)) => { // Regular table or CTE @@ -38,16 +38,16 @@ pub fn layer_source_query( } else { name.clone() }; - format!("SELECT * FROM {}", source) + Ok(format!("SELECT * FROM {}", source)) } Some(crate::DataSource::FilePath(path)) => { // File path source - format!("SELECT * FROM '{}'", path) + Ok(format!("SELECT * FROM '{}'", path)) } None => { // Layer uses global data debug_assert!(has_global, "Layer has no source and no global data"); - format!("SELECT * FROM {}", naming::global_table()) + Ok(format!("SELECT * FROM {}", naming::global_table())) } } } @@ -319,7 +319,7 @@ pub fn build_layer_base_query( ) -> String { // For annotation layers, source_query is already the VALUES clause from layer_source_query() // Just return it as-is - no wrapping, filtering, or casting needed - if matches!(layer.source, Some(crate::DataSource::Annotation(_))) { + if matches!(layer.source, Some(crate::DataSource::Annotation)) { return source_query.to_string(); } @@ -600,22 +600,47 @@ where /// /// Generates SQL like: `(VALUES (val1_row1, val2_row1), (val1_row2, val2_row2)) AS t(col1, col2)` /// -/// This eliminates the need for dummy tables and CASE statements by directly -/// specifying all values in a single VALUES clause with proper column names. +/// This function handles array recycling on-the-fly: +/// - Determines max array length from all literal aesthetics +/// - Replicates scalars to match max length +/// - Validates that all arrays have compatible lengths (1 or max) /// /// # Arguments /// /// * `layer` - The annotation layer with aesthetics as Literal values -/// * `n` - Number of rows (from DataSource::Annotation(n)) /// /// # Returns /// /// A complete SQL expression ready to use as a FROM clause -fn build_annotation_values_clause(layer: &Layer, n: usize) -> String { +fn build_annotation_values_clause(layer: &Layer) -> Result { use crate::plot::ArrayElement; - // Collect all aesthetic mappings that are literals - // Convert scalars to single-element vectors for uniform handling + // Step 1: Determine max array length from all literal aesthetics + let mut max_length = 1; + let mut array_lengths: Vec<(String, usize)> = Vec::new(); + + for (aesthetic, value) in &layer.mappings.aesthetics { + if let crate::AestheticValue::Literal(param) = value { + if let crate::plot::ParameterValue::Array(arr) = param { + let len = arr.len(); + if len > 1 { + array_lengths.push((aesthetic.clone(), len)); + if max_length > 1 && len != max_length { + // Multiple different non-1 lengths - error + return Err(GgsqlError::ValidationError(format!( + "PLACE annotation layer has mismatched array lengths: '{}' has length {}, but another has length {}", + aesthetic, len, max_length + ))); + } + if len > max_length { + max_length = len; + } + } + } + } + } + + // Step 2: Collect all aesthetic mappings and recycle scalars to max_length let mut columns: Vec> = Vec::new(); let mut column_names = Vec::new(); @@ -623,31 +648,24 @@ fn build_annotation_values_clause(layer: &Layer, n: usize) -> String { if let crate::AestheticValue::Literal(param) = value { let column_values = match param { crate::plot::ParameterValue::Array(arr) => { - assert_eq!( - arr.len(), - n, - "Array length mismatch: expected {}, got {} for aesthetic '{}'", - n, - arr.len(), - aesthetic - ); + // Arrays: use as-is (already validated to be compatible) arr.clone() } crate::plot::ParameterValue::Number(num) => { - // Scalar number: replicate n times - vec![ArrayElement::Number(*num); n] + // Scalar number: replicate to max_length + vec![ArrayElement::Number(*num); max_length] } crate::plot::ParameterValue::String(s) => { - // Scalar string: replicate n times - vec![ArrayElement::String(s.clone()); n] + // Scalar string: replicate to max_length + vec![ArrayElement::String(s.clone()); max_length] } crate::plot::ParameterValue::Boolean(b) => { - // Scalar boolean: replicate n times - vec![ArrayElement::Boolean(*b); n] + // Scalar boolean: replicate to max_length + vec![ArrayElement::Boolean(*b); max_length] } crate::plot::ParameterValue::Null => { - // Null: replicate n times - vec![ArrayElement::Null; n] + // Null: replicate to max_length + vec![ArrayElement::Null; max_length] } }; @@ -656,14 +674,14 @@ fn build_annotation_values_clause(layer: &Layer, n: usize) -> String { } } - // Build VALUES rows: (val1_row1, val2_row1), (val1_row2, val2_row2), ... + // Step 3: Build VALUES rows: (val1_row1, val2_row1), (val1_row2, val2_row2), ... let mut rows = Vec::new(); - for i in 0..n { + for i in 0..max_length { let values: Vec = columns.iter().map(|col| col[i].to_sql()).collect(); rows.push(format!("({})", values.join(", "))); } - // Build complete VALUES clause with column names + // Step 4: Build complete VALUES clause with column names let values_clause = rows.join(", "); let column_list = column_names .iter() @@ -673,8 +691,8 @@ fn build_annotation_values_clause(layer: &Layer, n: usize) -> String { // Return a SELECT statement wrapping the VALUES clause // This matches the format of regular layer queries and allows proper wrapping by schema queries - format!( + Ok(format!( "SELECT * FROM (VALUES {}) AS t({})", values_clause, column_list - ) + )) } diff --git a/src/execute/mod.rs b/src/execute/mod.rs index 6de7bda1..df76e821 100644 --- a/src/execute/mod.rs +++ b/src/execute/mod.rs @@ -154,7 +154,7 @@ fn merge_global_mappings_into_layers(specs: &mut [Plot], layer_schemas: &[Schema for spec in specs { for (layer, schema) in spec.layers.iter_mut().zip(layer_schemas.iter()) { // Skip annotation layers - they don't inherit global mappings - if matches!(layer.source, Some(DataSource::Annotation(_))) { + if matches!(layer.source, Some(DataSource::Annotation)) { continue; } @@ -936,11 +936,12 @@ pub fn prepare_data_with_reader(query: &str, reader: &R) -> Result = specs[0] .layers .iter() .map(|l| layer::layer_source_query(l, &materialized_ctes, has_global_table)) - .collect(); + .collect::>>()?; // Get types for each layer from source queries (Phase 1: types only, no min/max yet) let mut layer_type_info: Vec> = Vec::new(); @@ -2246,7 +2247,7 @@ mod tests { let annotation_layer = &result.specs[0].layers[1]; assert_eq!(annotation_layer.geom, crate::Geom::text()); assert!( - matches!(annotation_layer.source, Some(DataSource::Annotation(_))), + matches!(annotation_layer.source, Some(DataSource::Annotation)), "PLACE layer should have Annotation source" ); @@ -2394,13 +2395,14 @@ mod tests { let annotation_layer = &result.specs[0].layers[1]; assert_eq!( annotation_layer.source, - Some(DataSource::Annotation(3)), - "Should recycle scalar y to length 3" + Some(DataSource::Annotation), + "Should be an annotation layer" ); + // Verify recycling happened: scalar y should be recycled to match array x length (3) let annotation_key = annotation_layer.data_key.as_ref().unwrap(); let annotation_df = result.data.get(annotation_key).unwrap(); - assert_eq!(annotation_df.height(), 3, "Should have 3 rows"); + assert_eq!(annotation_df.height(), 3, "Should have 3 rows after recycling"); } #[cfg(feature = "duckdb")] diff --git a/src/parser/builder.rs b/src/parser/builder.rs index 6f82f1c7..736bb7e2 100644 --- a/src/parser/builder.rs +++ b/src/parser/builder.rs @@ -507,9 +507,9 @@ fn build_place_layer(node: &Node, source: &SourceTree) -> Result { // Build the layer using standard logic let mut layer = build_layer(node, source)?; - // Mark as annotation layer with initial length of 1 - // The length may be updated in process_annotation_layers() if arrays are present - layer.source = Some(DataSource::Annotation(1)); + // Mark as annotation layer + // Array recycling happens later during SQL generation in build_annotation_values_clause() + layer.source = Some(DataSource::Annotation); Ok(layer) } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 2979a51e..5e9af460 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -440,7 +440,7 @@ mod tests { // Second layer: PLACE text with annotation source assert_eq!(specs[0].layers[1].geom, Geom::text()); assert!( - matches!(specs[0].layers[1].source, Some(DataSource::Annotation(_))), + matches!(specs[0].layers[1].source, Some(DataSource::Annotation)), "PLACE layer should have Annotation source" ); diff --git a/src/plot/layer/mod.rs b/src/plot/layer/mod.rs index 5c7e80ab..2aafe642 100644 --- a/src/plot/layer/mod.rs +++ b/src/plot/layer/mod.rs @@ -241,7 +241,7 @@ impl Layer { use crate::naming; use crate::plot::aesthetic::is_positional_aesthetic; - let is_annotation = matches!(self.source, Some(crate::DataSource::Annotation(_))); + let is_annotation = matches!(self.source, Some(crate::DataSource::Annotation)); for (aesthetic, value) in self.mappings.aesthetics.iter_mut() { let aes_col_name = naming::aesthetic_column(aesthetic); diff --git a/src/plot/main.rs b/src/plot/main.rs index 390b37e2..fb144021 100644 --- a/src/plot/main.rs +++ b/src/plot/main.rs @@ -206,49 +206,25 @@ impl Plot { } /// Process annotation layers by moving positional and required aesthetics - /// from parameters to mappings, with array recycling support. + /// from parameters to mappings. /// /// This must be called AFTER transform_aesthetics_to_internal() so that /// parameter keys are already in internal space (pos1, pos2, etc.). /// - /// Array recycling rules: - /// - Scalars and length-1 arrays are recycled to match the max array length - /// - All non-1 arrays must have the same length (error on mismatch) - /// - Only required/positional aesthetics that are moved to mappings get recycled - /// /// Positional aesthetics need to be in mappings so they go through the same /// transformation pipeline (internal→user space) as data-mapped aesthetics, /// enabling them to participate in scale training and coordinate transformations. + /// + /// Note: Array recycling (replicating scalars to match array lengths) happens later + /// during SQL generation in `build_annotation_values_clause()`. pub fn process_annotation_layers(&mut self) -> Result<(), crate::GgsqlError> { for layer in &mut self.layers { - // Get initial length from DataSource - let mut max_length = match &layer.source { - Some(DataSource::Annotation(n)) => *n, - _ => continue, - }; - - // Step 1: Determine max array length from all parameters - let mut array_lengths: std::collections::HashMap = - std::collections::HashMap::new(); - - for (param_name, value) in &layer.parameters { - if let ParameterValue::Array(arr) = value { - let len = arr.len(); - array_lengths.insert(param_name.clone(), len); - if len > 1 && len != max_length { - if max_length > 1 { - // Multiple different non-1 lengths - error - return Err(crate::GgsqlError::ValidationError(format!( - "PLACE annotation layer has mismatched array lengths: '{}' has length {}, but another has length {}", - param_name, len, max_length - ))); - } - max_length = len; - } - } + // Skip non-annotation layers + if !matches!(&layer.source, Some(DataSource::Annotation)) { + continue; } - // Step 2: Move required/positional/array aesthetics to mappings with recycling + // Move required/positional/array aesthetics to mappings let required_aesthetics = layer.geom.aesthetics().required(); let param_keys: Vec = layer.parameters.keys().cloned().collect(); @@ -267,25 +243,14 @@ impl Plot { continue; } - // Move from parameters to mappings with recycling + // Move from parameters to mappings (no recycling - happens later in SQL generation) if let Some(value) = layer.parameters.remove(¶m_name) { - let recycled_value = if max_length > 1 { - value.rep(max_length)? - } else { - value - }; - layer .mappings - .insert(¶m_name, AestheticValue::Literal(recycled_value)); + .insert(¶m_name, AestheticValue::Literal(value)); } } } - - // Step 3: Update DataSource with the correct length - if max_length > 1 { - layer.source = Some(DataSource::Annotation(max_length)); - } } Ok(()) diff --git a/src/plot/types.rs b/src/plot/types.rs index 1861cdf8..72af3ce7 100644 --- a/src/plot/types.rs +++ b/src/plot/types.rs @@ -140,9 +140,9 @@ pub enum DataSource { Identifier(String), /// File path (quoted string like 'data.csv') FilePath(String), - /// Annotation layer (PLACE clause) with number of annotation rows - /// The usize represents how many rows to generate (from array expansion) - Annotation(usize), + /// Annotation layer (PLACE clause) + /// Row count and array recycling handled during SQL generation + Annotation, } impl DataSource { @@ -151,7 +151,7 @@ impl DataSource { match self { DataSource::Identifier(s) => s, DataSource::FilePath(s) => s, - DataSource::Annotation(_) => "__annotation__", + DataSource::Annotation => "__annotation__", } } @@ -162,7 +162,7 @@ impl DataSource { /// Returns true if this is an annotation layer source pub fn is_annotation(&self) -> bool { - matches!(self, DataSource::Annotation(_)) + matches!(self, DataSource::Annotation) } } From 6aa345d7e0bd3c153ee2014c008f3a8a58467122 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Fri, 6 Mar 2026 14:52:06 +0100 Subject: [PATCH 13/26] Replace is_scaled flag with AnnotationColumn variant Implements a cleaner approach for handling annotation aesthetics: - Added AnnotationColumn variant to AestheticValue enum - AnnotationColumn used only for non-positional annotation aesthetics (color, size, etc) - Positional annotation aesthetics (x, y) use regular Column variant - Removed is_scaled field from Column variant - Scale training automatically skips AnnotationColumn variants - Vega-Lite writer uses identity scale for AnnotationColumn This makes the distinction explicit in the type system rather than carrying state in a boolean flag. Positional annotations participate in scale training (data coordinate space) while non-positional annotations use identity scales (visual space). All 16 annotation tests pass. Co-Authored-By: Claude Sonnet 4.5 --- src/execute/layer.rs | 5 ++-- src/execute/mod.rs | 1 - src/execute/scale.rs | 17 ++++------- src/execute/schema.rs | 2 +- src/plot/layer/mod.rs | 20 +++++++++---- src/plot/types.rs | 52 +++++++++++++++++---------------- src/writer/vegalite/encoding.rs | 7 +++-- 7 files changed, 56 insertions(+), 48 deletions(-) diff --git a/src/execute/layer.rs b/src/execute/layer.rs index 0f477005..e1837b8a 100644 --- a/src/execute/layer.rs +++ b/src/execute/layer.rs @@ -96,7 +96,7 @@ pub fn build_layer_select_list( for (aesthetic, value) in &layer.mappings.aesthetics { let aes_col_name = naming::aesthetic_column(aesthetic); let select_expr = match value { - AestheticValue::Column { name, .. } => { + AestheticValue::Column { name, .. } | AestheticValue::AnnotationColumn { name } => { // Check if this column needs casting if let Some(req) = cast_map.get(name.as_str()) { // Cast and rename to prefixed aesthetic name @@ -144,7 +144,7 @@ pub fn apply_remappings_post_query(df: DataFrame, layer: &Layer) -> Result { + AestheticValue::Column { name, .. } | AestheticValue::AnnotationColumn { name } => { // Check if this stat column exists in the DataFrame if df.column(name).is_ok() { df.rename(name, target_col_name.into()).map_err(|e| { @@ -528,7 +528,6 @@ where name: prefixed_name, original_name, is_dummy, - is_scaled: true, }; layer.mappings.insert(aesthetic.clone(), value); } diff --git a/src/execute/mod.rs b/src/execute/mod.rs index df76e821..8e83b699 100644 --- a/src/execute/mod.rs +++ b/src/execute/mod.rs @@ -308,7 +308,6 @@ fn add_facet_mappings_to_layers( name: var.to_string(), original_name: Some(var.to_string()), is_dummy: false, - is_scaled: true, }, ); } diff --git a/src/execute/scale.rs b/src/execute/scale.rs index b8127e90..1c7e9364 100644 --- a/src/execute/scale.rs +++ b/src/execute/scale.rs @@ -529,7 +529,7 @@ pub fn find_schema_columns_for_aesthetic( for aes_name in &aesthetics_to_check { if let Some(value) = layer.mappings.get(aes_name) { match value { - AestheticValue::Column { name, .. } => { + AestheticValue::Column { name, .. } | AestheticValue::AnnotationColumn { name } => { if let Some(info) = schema.iter().find(|c| c.name == *name) { infos.push(info.clone()); } @@ -1005,18 +1005,13 @@ pub fn find_columns_for_aesthetic<'a>( for (i, layer) in layers.iter().enumerate() { if let Some(df) = data_map.get(&naming::layer_key(i)) { for aes_name in &aesthetics_to_check { - if let Some(AestheticValue::Column { - name, is_scaled, .. - }) = layer.mappings.get(aes_name) - { - // Only train scales for columns that should be scaled - // Annotation layer literals have is_scaled: false - if *is_scaled { - if let Ok(column) = df.column(name) { - column_refs.push(column); - } + if let Some(AestheticValue::Column { name, .. }) = layer.mappings.get(aes_name) { + // Regular columns (data and positional annotations) participate in scale training + if let Ok(column) = df.column(name) { + column_refs.push(column); } } + // AnnotationColumn and Literal don't participate in scale training } } } diff --git a/src/execute/schema.rs b/src/execute/schema.rs index c59b390a..4e2ad1f0 100644 --- a/src/execute/schema.rs +++ b/src/execute/schema.rs @@ -301,7 +301,7 @@ pub fn build_aesthetic_schema(layer: &Layer, schema: &Schema) -> Schema { for (aesthetic, value) in &layer.mappings.aesthetics { let aes_col_name = naming::aesthetic_column(aesthetic); match value { - AestheticValue::Column { name, .. } => { + AestheticValue::Column { name, .. } | AestheticValue::AnnotationColumn { name } => { // The schema already has aesthetic-prefixed column names from build_layer_base_query, // so we look up by aesthetic name, not the original column name. // Fall back to original name for backwards compatibility with older schemas. diff --git a/src/plot/layer/mod.rs b/src/plot/layer/mod.rs index 2aafe642..d2bda03d 100644 --- a/src/plot/layer/mod.rs +++ b/src/plot/layer/mod.rs @@ -258,12 +258,18 @@ impl Layer { // Column is now named with the prefixed aesthetic name *name = aes_col_name; } + AestheticValue::AnnotationColumn { name } => { + // AnnotationColumn already has identity scale behavior, just update name + *name = aes_col_name; + } AestheticValue::Literal(_) => { // Literals become columns with prefixed aesthetic name - // For annotation layers, non-positional aesthetics should use identity scales + // For annotation layers: + // - Positional aesthetics (x, y): use Column (data coordinate space, participate in scales) + // - Non-positional aesthetics (color, size): use AnnotationColumn (visual space, identity scale) let is_positional = is_positional_aesthetic(aesthetic); *value = if is_annotation && !is_positional { - AestheticValue::identity_column(aes_col_name) + AestheticValue::annotation_column(aes_col_name) } else { AestheticValue::standard_column(aes_col_name) }; @@ -293,7 +299,6 @@ impl Layer { AestheticValue::Column { original_name, is_dummy, - is_scaled, .. } => { // Use the stat name from remappings as the original_name for labels @@ -302,7 +307,13 @@ impl Layer { name: prefixed_name, original_name: original_name.clone(), is_dummy: *is_dummy, - is_scaled: *is_scaled, + } + } + AestheticValue::AnnotationColumn { .. } => { + // Annotation columns can be remapped (e.g., stat transforms on annotation data) + // They remain annotation columns (identity scale) + AestheticValue::AnnotationColumn { + name: prefixed_name, } } AestheticValue::Literal(_) => { @@ -312,7 +323,6 @@ impl Layer { name: prefixed_name, original_name: None, is_dummy: false, - is_scaled: true, } } }; diff --git a/src/plot/types.rs b/src/plot/types.rs index 72af3ce7..b3e82039 100644 --- a/src/plot/types.rs +++ b/src/plot/types.rs @@ -173,7 +173,7 @@ impl DataSource { /// Value for aesthetic mappings #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub enum AestheticValue { - /// Column reference + /// Column reference from data source Column { name: String, /// Original column name before internal renaming (for labels) @@ -182,33 +182,25 @@ pub enum AestheticValue { original_name: Option, /// Whether this is a dummy/placeholder column (e.g., for bar charts without x mapped) is_dummy: bool, - /// Whether scales should be applied to this column - /// Set to false for annotation layer literals (e.g., stroke => ['red', 'blue']) - /// that are converted to columns but should use identity scales - is_scaled: bool, + }, + /// Annotation column for non-positional aesthetics (synthesized from PLACE literals) + /// These columns are generated from user-specified literal values in visual space + /// (e.g., color => 'red', size => 10) and use identity scales (no transformation). + /// Positional annotations (x, y) use Column instead since they're in data coordinate space. + AnnotationColumn { + name: String, }, /// Literal value (quoted string, number, or boolean) Literal(ParameterValue), } impl AestheticValue { - /// Create a column mapping + /// Create a standard column mapping pub fn standard_column(name: impl Into) -> Self { Self::Column { name: name.into(), original_name: None, is_dummy: false, - is_scaled: true, - } - } - - /// Create a column mapping with identity scale (no scale transformation) - pub fn identity_column(name: impl Into) -> Self { - Self::Column { - name: name.into(), - original_name: None, - is_dummy: false, - is_scaled: false, } } @@ -218,7 +210,6 @@ impl AestheticValue { name: name.into(), original_name: None, is_dummy: true, - is_scaled: true, } } @@ -231,14 +222,20 @@ impl AestheticValue { name: name.into(), original_name: Some(original_name.into()), is_dummy: false, - is_scaled: true, + } + } + + /// Create an annotation column mapping (synthesized from PLACE literals) + pub fn annotation_column(name: impl Into) -> Self { + Self::AnnotationColumn { + name: name.into(), } } /// Get column name if this is a column mapping pub fn column_name(&self) -> Option<&str> { match self { - Self::Column { name, .. } => Some(name), + Self::Column { name, .. } | Self::AnnotationColumn { name } => Some(name), _ => None, } } @@ -255,16 +252,19 @@ impl AestheticValue { original_name, .. } => Some(original_name.as_deref().unwrap_or(name)), + Self::AnnotationColumn { name } => Some(name), _ => None, } } /// Check if this is a dummy/placeholder column pub fn is_dummy(&self) -> bool { - match self { - Self::Column { is_dummy, .. } => *is_dummy, - _ => false, - } + matches!(self, Self::Column { is_dummy: true, .. }) + } + + /// Check if this is an annotation column + pub fn is_annotation(&self) -> bool { + matches!(self, Self::AnnotationColumn { .. }) } /// Check if this is a literal value (not a column mapping) @@ -276,7 +276,9 @@ impl AestheticValue { impl std::fmt::Display for AestheticValue { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - AestheticValue::Column { name, .. } => write!(f, "{}", name), + AestheticValue::Column { name, .. } | AestheticValue::AnnotationColumn { name } => { + write!(f, "{}", name) + } AestheticValue::Literal(lit) => write!(f, "{}", lit), } } diff --git a/src/writer/vegalite/encoding.rs b/src/writer/vegalite/encoding.rs index d6f53d2f..52b53260 100644 --- a/src/writer/vegalite/encoding.rs +++ b/src/writer/vegalite/encoding.rs @@ -802,8 +802,11 @@ pub(super) fn build_encoding_channel( name: col, original_name, is_dummy, - is_scaled, - } => build_column_encoding(aesthetic, col, original_name, *is_dummy, *is_scaled, ctx), + } => build_column_encoding(aesthetic, col, original_name, *is_dummy, false, ctx), + AestheticValue::AnnotationColumn { name: col } => { + // Non-positional annotation columns use identity scale + build_column_encoding(aesthetic, col, &None, false, true, ctx) + } AestheticValue::Literal(lit) => build_literal_encoding(aesthetic, lit), } } From 82bfe3841254f0e172b52675f5e781e393e1be71 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Fri, 6 Mar 2026 15:10:25 +0100 Subject: [PATCH 14/26] Filter NULL aesthetics in annotation layers NULL aesthetics in PLACE SETTING mean "use geom default", not "create a mapping". Filter them out in process_annotation_layers() to prevent them from reaching VALUES clause generation or literal_to_series(). Co-Authored-By: Claude Sonnet 4.5 --- src/execute/layer.rs | 6 +++--- src/plot/main.rs | 9 ++++++--- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/execute/layer.rs b/src/execute/layer.rs index e1837b8a..8f92c246 100644 --- a/src/execute/layer.rs +++ b/src/execute/layer.rs @@ -183,7 +183,7 @@ pub fn literal_to_series(name: &str, lit: &ParameterValue, len: usize) -> polars ParameterValue::String(s) => Series::new(name.into(), vec![s.as_str(); len]), ParameterValue::Boolean(b) => Series::new(name.into(), vec![*b; len]), ParameterValue::Array(_) | ParameterValue::Null => { - unreachable!("Grammar prevents arrays and null in literal aesthetic mappings") + unreachable!("Arrays are never moved to mappings; NULL is filtered in process_annotation_layers()") } } } @@ -663,8 +663,8 @@ fn build_annotation_values_clause(layer: &Layer) -> Result { vec![ArrayElement::Boolean(*b); max_length] } crate::plot::ParameterValue::Null => { - // Null: replicate to max_length - vec![ArrayElement::Null; max_length] + // NULL aesthetics are filtered in process_annotation_layers() + unreachable!("NULL is filtered before reaching annotation VALUES clause") } }; diff --git a/src/plot/main.rs b/src/plot/main.rs index fb144021..2efb7fe1 100644 --- a/src/plot/main.rs +++ b/src/plot/main.rs @@ -245,9 +245,12 @@ impl Plot { // Move from parameters to mappings (no recycling - happens later in SQL generation) if let Some(value) = layer.parameters.remove(¶m_name) { - layer - .mappings - .insert(¶m_name, AestheticValue::Literal(value)); + // Filter out NULL aesthetics - they mean "use geom default" + if !value.is_null() { + layer + .mappings + .insert(¶m_name, AestheticValue::Literal(value)); + } } } } From 00337d52a687a4c9c7c374f7ae8186db5714cf8e Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Fri, 6 Mar 2026 15:45:41 +0100 Subject: [PATCH 15/26] Add unit tests for annotation layer functions Reorganize test suite to replace integration tests with unit tests where appropriate, improving test speed and clarity. Added unit tests: - src/execute/layer.rs: 5 tests for build_annotation_values_clause() * Single scalar values * Array recycling * Mismatched array lengths (error case) * Multiple arrays with same length * Mixed types (number, string, boolean) - src/plot/main.rs: 6 tests for process_annotation_layers() * Moves positional aesthetics to mappings * Moves required aesthetics to mappings * Moves array parameters to mappings * Filters NULL aesthetics * Skips non-annotation layers * Preserves existing mappings Removed redundant integration tests: - test_place_array_recycling_scalar (now unit test) - test_place_array_mismatched_lengths_error (now unit test) All 12 annotation-related tests pass. Co-Authored-By: Claude Sonnet 4.5 --- src/execute/layer.rs | 152 +++++++++++++++++++++++++++++++++++++++++++ src/execute/mod.rs | 55 +--------------- src/plot/main.rs | 121 ++++++++++++++++++++++++++++++++++ 3 files changed, 274 insertions(+), 54 deletions(-) diff --git a/src/execute/layer.rs b/src/execute/layer.rs index 8f92c246..314cdd0d 100644 --- a/src/execute/layer.rs +++ b/src/execute/layer.rs @@ -695,3 +695,155 @@ fn build_annotation_values_clause(layer: &Layer) -> Result { values_clause, column_list )) } + +#[cfg(test)] +mod tests { + use super::*; + use crate::plot::{AestheticValue, ArrayElement, DataSource, Geom, Layer, ParameterValue}; + + #[test] + fn test_build_annotation_values_clause_single_scalar() { + let mut layer = Layer::new(Geom::text()); + layer.source = Some(DataSource::Annotation); + layer + .mappings + .insert("pos1", AestheticValue::Literal(ParameterValue::Number(5.0))); + layer + .mappings + .insert("pos2", AestheticValue::Literal(ParameterValue::Number(10.0))); + layer.mappings.insert( + "label", + AestheticValue::Literal(ParameterValue::String("Test".to_string())), + ); + + let result = build_annotation_values_clause(&layer).unwrap(); + + // Should produce: SELECT * FROM (VALUES (...)) AS t("__ggsql_aes_pos1__", ...) + assert!(result.contains("VALUES")); + // Check all values are present (order may vary due to HashMap) + assert!(result.contains("5")); + assert!(result.contains("10")); + assert!(result.contains("'Test'")); + assert!(result.contains("__ggsql_aes_pos1__")); + assert!(result.contains("__ggsql_aes_pos2__")); + assert!(result.contains("__ggsql_aes_label__")); + } + + #[test] + fn test_build_annotation_values_clause_array_recycling() { + let mut layer = Layer::new(Geom::text()); + layer.source = Some(DataSource::Annotation); + layer.mappings.insert( + "pos1", + AestheticValue::Literal(ParameterValue::Array(vec![ + ArrayElement::Number(1.0), + ArrayElement::Number(2.0), + ArrayElement::Number(3.0), + ])), + ); + layer + .mappings + .insert("pos2", AestheticValue::Literal(ParameterValue::Number(10.0))); + layer.mappings.insert( + "label", + AestheticValue::Literal(ParameterValue::String("Same".to_string())), + ); + + let result = build_annotation_values_clause(&layer).unwrap(); + + // Should recycle scalar pos2 and label to match array length (3) + assert!(result.contains("VALUES")); + // Check that all values appear (order may vary due to HashMap) + assert!(result.contains("1") && result.contains("2") && result.contains("3")); + assert!(result.contains("10")); + assert!(result.contains("'Same'")); + // Check row count by counting parentheses pairs in VALUES + assert_eq!(result.matches("), (").count() + 1, 3, "Should have 3 rows"); + } + + #[test] + fn test_build_annotation_values_clause_mismatched_arrays() { + let mut layer = Layer::new(Geom::text()); + layer.source = Some(DataSource::Annotation); + layer.mappings.insert( + "pos1", + AestheticValue::Literal(ParameterValue::Array(vec![ + ArrayElement::Number(1.0), + ArrayElement::Number(2.0), + ArrayElement::Number(3.0), + ])), + ); + layer.mappings.insert( + "pos2", + AestheticValue::Literal(ParameterValue::Array(vec![ + ArrayElement::Number(10.0), + ArrayElement::Number(20.0), + ])), + ); + + let result = build_annotation_values_clause(&layer); + + // Should error with mismatched lengths + assert!(result.is_err()); + let err_msg = result.unwrap_err().to_string(); + assert!(err_msg.contains("mismatched array lengths"), "Error message should mention mismatched arrays"); + // Error should mention one of the aesthetics (order may vary) + assert!( + err_msg.contains("pos1") || err_msg.contains("pos2"), + "Error message should mention at least one aesthetic" + ); + } + + #[test] + fn test_build_annotation_values_clause_multiple_arrays_same_length() { + let mut layer = Layer::new(Geom::text()); + layer.source = Some(DataSource::Annotation); + layer.mappings.insert( + "pos1", + AestheticValue::Literal(ParameterValue::Array(vec![ + ArrayElement::Number(1.0), + ArrayElement::Number(2.0), + ])), + ); + layer.mappings.insert( + "pos2", + AestheticValue::Literal(ParameterValue::Array(vec![ + ArrayElement::Number(10.0), + ArrayElement::Number(20.0), + ])), + ); + + let result = build_annotation_values_clause(&layer).unwrap(); + + // Both arrays have length 2, should work (order may vary) + assert!(result.contains("VALUES")); + assert!(result.contains("1") && result.contains("2")); + assert!(result.contains("10") && result.contains("20")); + assert_eq!(result.matches("), (").count() + 1, 2, "Should have 2 rows"); + } + + #[test] + fn test_build_annotation_values_clause_mixed_types() { + let mut layer = Layer::new(Geom::text()); + layer.source = Some(DataSource::Annotation); + layer + .mappings + .insert("pos1", AestheticValue::Literal(ParameterValue::Number(5.0))); + layer.mappings.insert( + "label", + AestheticValue::Literal(ParameterValue::String("Text".to_string())), + ); + layer.mappings.insert( + "visible", + AestheticValue::Literal(ParameterValue::Boolean(true)), + ); + + let result = build_annotation_values_clause(&layer).unwrap(); + + // Should handle different types (order may vary) + assert!(result.contains("VALUES")); + assert!(result.contains("5")); + assert!(result.contains("'Text'")); + assert!(result.contains("TRUE") || result.contains("true"), "Should contain boolean value"); + } +} diff --git a/src/execute/mod.rs b/src/execute/mod.rs index 8e83b699..5a173011 100644 --- a/src/execute/mod.rs +++ b/src/execute/mod.rs @@ -2377,60 +2377,6 @@ mod tests { } } - #[cfg(feature = "duckdb")] - #[test] - fn test_place_array_recycling_scalar() { - let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); - - let query = r#" - SELECT 1 AS x, 10 AS y - VISUALISE x, y - DRAW point - PLACE text SETTING x => [1, 2, 3], y => 10, label => 'Same' - "#; - - let result = prepare_data_with_reader(query, &reader).unwrap(); - - let annotation_layer = &result.specs[0].layers[1]; - assert_eq!( - annotation_layer.source, - Some(DataSource::Annotation), - "Should be an annotation layer" - ); - - // Verify recycling happened: scalar y should be recycled to match array x length (3) - let annotation_key = annotation_layer.data_key.as_ref().unwrap(); - let annotation_df = result.data.get(annotation_key).unwrap(); - assert_eq!(annotation_df.height(), 3, "Should have 3 rows after recycling"); - } - - #[cfg(feature = "duckdb")] - #[test] - fn test_place_array_mismatched_lengths_error() { - let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); - - let query = r#" - SELECT 1 AS x, 10 AS y - VISUALISE x, y - DRAW point - PLACE text SETTING x => [1, 2, 3], y => [10, 20], label => 'Test' - "#; - - let result = prepare_data_with_reader(query, &reader); - assert!(result.is_err(), "Should error on mismatched array lengths"); - - match result { - Err(GgsqlError::ValidationError(msg)) => { - assert!( - msg.contains("mismatched array lengths"), - "Error should mention mismatched lengths: {}", - msg - ); - } - Err(e) => panic!("Expected ValidationError, got: {}", e), - Ok(_) => panic!("Expected error, got success"), - } - } #[cfg(feature = "duckdb")] #[test] @@ -2471,3 +2417,4 @@ mod tests { ); } } + diff --git a/src/plot/main.rs b/src/plot/main.rs index 2efb7fe1..2f6d0c55 100644 --- a/src/plot/main.rs +++ b/src/plot/main.rs @@ -791,4 +791,125 @@ mod tests { "Non-color aesthetic should keep its name" ); } + + #[test] + fn test_process_annotation_layers_moves_positional_aesthetics() { + let mut plot = Plot::new(); + let mut layer = Layer::new(Geom::text()); + layer.source = Some(DataSource::Annotation); + layer.parameters.insert("pos1".to_string(), ParameterValue::Number(5.0)); + layer.parameters.insert("pos2".to_string(), ParameterValue::Number(10.0)); + layer.parameters.insert("size".to_string(), ParameterValue::Number(14.0)); + plot.layers.push(layer); + + plot.process_annotation_layers().unwrap(); + + // Positional aesthetics should be moved to mappings + assert!(plot.layers[0].mappings.contains_key("pos1")); + assert!(plot.layers[0].mappings.contains_key("pos2")); + // Non-positional should stay in parameters + assert!(plot.layers[0].parameters.contains_key("size")); + assert!(!plot.layers[0].mappings.contains_key("size")); + } + + #[test] + fn test_process_annotation_layers_moves_required_aesthetics() { + let mut plot = Plot::new(); + let mut layer = Layer::new(Geom::text()); + layer.source = Some(DataSource::Annotation); + layer.parameters.insert("pos1".to_string(), ParameterValue::Number(5.0)); + layer.parameters.insert("pos2".to_string(), ParameterValue::Number(10.0)); + layer.parameters.insert("label".to_string(), ParameterValue::String("Text".to_string())); + plot.layers.push(layer); + + plot.process_annotation_layers().unwrap(); + + // pos1 and pos2 are required and positional - should be moved to mappings + assert!(plot.layers[0].mappings.contains_key("pos1")); + assert!(plot.layers[0].mappings.contains_key("pos2")); + // label is optional (not required) - should stay in parameters + assert!(!plot.layers[0].mappings.contains_key("label")); + assert!(plot.layers[0].parameters.contains_key("label")); + } + + #[test] + fn test_process_annotation_layers_moves_array_parameters() { + let mut plot = Plot::new(); + let mut layer = Layer::new(Geom::text()); + layer.source = Some(DataSource::Annotation); + layer.parameters.insert("pos1".to_string(), ParameterValue::Array(vec![ + ArrayElement::Number(1.0), + ArrayElement::Number(2.0), + ])); + layer.parameters.insert("stroke".to_string(), ParameterValue::Array(vec![ + ArrayElement::String("red".to_string()), + ArrayElement::String("blue".to_string()), + ])); + plot.layers.push(layer); + + plot.process_annotation_layers().unwrap(); + + // Array parameters should be moved to mappings (even non-positional) + assert!(plot.layers[0].mappings.contains_key("pos1")); + assert!(plot.layers[0].mappings.contains_key("stroke")); + assert!(!plot.layers[0].parameters.contains_key("pos1")); + assert!(!plot.layers[0].parameters.contains_key("stroke")); + } + + #[test] + fn test_process_annotation_layers_filters_null_aesthetics() { + let mut plot = Plot::new(); + let mut layer = Layer::new(Geom::text()); + layer.source = Some(DataSource::Annotation); + layer.parameters.insert("pos1".to_string(), ParameterValue::Number(5.0)); + layer.parameters.insert("pos2".to_string(), ParameterValue::Null); // NULL should be filtered + layer.parameters.insert("label".to_string(), ParameterValue::String("Text".to_string())); + plot.layers.push(layer); + + plot.process_annotation_layers().unwrap(); + + // pos1 should be moved to mappings (positional) + assert!(plot.layers[0].mappings.contains_key("pos1")); + // label should stay in parameters (optional, not positional) + assert!(!plot.layers[0].mappings.contains_key("label")); + assert!(plot.layers[0].parameters.contains_key("label")); + // NULL pos2 should not be in mappings (filtered out) + assert!(!plot.layers[0].mappings.contains_key("pos2")); + // NULL should also be removed from parameters + assert!(!plot.layers[0].parameters.contains_key("pos2")); + } + + #[test] + fn test_process_annotation_layers_skips_non_annotation_layers() { + let mut plot = Plot::new(); + let mut layer = Layer::new(Geom::point()); + layer.source = None; // Regular layer + layer.parameters.insert("pos1".to_string(), ParameterValue::Number(5.0)); + plot.layers.push(layer); + + plot.process_annotation_layers().unwrap(); + + // Parameters should not be moved for non-annotation layers + assert!(plot.layers[0].parameters.contains_key("pos1")); + assert!(!plot.layers[0].mappings.contains_key("pos1")); + } + + #[test] + fn test_process_annotation_layers_keeps_existing_mappings() { + let mut plot = Plot::new(); + let mut layer = Layer::new(Geom::text()); + layer.source = Some(DataSource::Annotation); + layer.mappings.insert("pos1", AestheticValue::Literal(ParameterValue::Number(5.0))); + layer.parameters.insert("pos1".to_string(), ParameterValue::Number(10.0)); // Should be ignored + plot.layers.push(layer); + + plot.process_annotation_layers().unwrap(); + + // Existing mapping should be preserved (not overwritten) + if let Some(AestheticValue::Literal(ParameterValue::Number(n))) = plot.layers[0].mappings.get("pos1") { + assert_eq!(*n, 5.0, "Should keep original mapping value"); + } else { + panic!("pos1 should be a literal number"); + } + } } From 46e46fe28da7f0e6f5e4062ef009a37808c198f4 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Mon, 9 Mar 2026 15:37:17 +0100 Subject: [PATCH 16/26] Fix annotation layers to work with stat geoms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Annotation layers now support stat geoms (histogram, density, etc.) by using the same column→aesthetic renaming pipeline as regular layers. Previously, annotation layers created prefixed aesthetic columns directly (__ggsql_aes_pos1__), causing naming conflicts when stat transforms tried to rename their output columns to the same names. Changes: - Use raw aesthetic names in VALUES clause (pos1, not __ggsql_aes_pos1__) - Move annotation processing from parse-time to execution-time - Rename build_annotation_values_clause() to process_annotation_layer() - Fix is_scaled parameter (was inverted for Column vs AnnotationColumn) - Make label required for text geom - Flatten nested conditionals with early exits - Remove obsolete Plot::process_annotation_layers() Positional aesthetics now correctly participate in scale training with data-derived domains, while non-positional aesthetics use identity scales. Co-Authored-By: Claude Sonnet 4.5 --- src/execute/casting.rs | 2 +- src/execute/layer.rs | 313 ++++++++++++++++++-------------- src/execute/mod.rs | 96 ++++++++-- src/execute/scale.rs | 3 +- src/parser/builder.rs | 9 +- src/parser/mod.rs | 29 +-- src/plot/layer/geom/text.rs | 2 +- src/plot/main.rs | 165 +---------------- src/plot/types.rs | 8 +- src/writer/vegalite/encoding.rs | 4 +- 10 files changed, 291 insertions(+), 340 deletions(-) diff --git a/src/execute/casting.rs b/src/execute/casting.rs index fd56eb7a..7d40989e 100644 --- a/src/execute/casting.rs +++ b/src/execute/casting.rs @@ -3,9 +3,9 @@ //! This module handles determining which columns need type casting based on //! scale requirements and updating type info accordingly. +use crate::naming; use crate::plot::scale::coerce_dtypes; use crate::plot::{CastTargetType, Plot, SqlTypeNames}; -use crate::naming; use polars::prelude::{DataType, TimeUnit}; use std::collections::HashMap; diff --git a/src/execute/layer.rs b/src/execute/layer.rs index 314cdd0d..4017f5b0 100644 --- a/src/execute/layer.rs +++ b/src/execute/layer.rs @@ -17,19 +17,22 @@ use super::schema::build_aesthetic_schema; /// Build the source query for a layer. /// /// Returns a complete query that can be executed to retrieve the layer's data: -/// - Annotation layers → VALUES clause with all aesthetic columns +/// - Annotation layers → VALUES clause with all aesthetic columns (modifies layer mappings) /// - Table/CTE layers → `SELECT * FROM table_or_cte` /// - File path layers → `SELECT * FROM 'path'` /// - Layers without explicit source → `SELECT * FROM __ggsql_global__` +/// +/// For annotation layers, this function processes parameters and converts them to +/// Column/AnnotationColumn mappings, so the layer is modified in place. pub fn layer_source_query( - layer: &Layer, + layer: &mut Layer, materialized_ctes: &HashSet, has_global: bool, ) -> Result { match &layer.source { Some(crate::DataSource::Annotation) => { - // Annotation layers: return complete VALUES clause (with on-the-fly recycling) - build_annotation_values_clause(layer) + // Annotation layers: process parameters and return complete VALUES clause (with on-the-fly recycling) + process_annotation_layer(layer) } Some(crate::DataSource::Identifier(name)) => { // Regular table or CTE @@ -317,11 +320,9 @@ pub fn build_layer_base_query( source_query: &str, type_requirements: &[TypeRequirement], ) -> String { - // For annotation layers, source_query is already the VALUES clause from layer_source_query() - // Just return it as-is - no wrapping, filtering, or casting needed - if matches!(layer.source, Some(crate::DataSource::Annotation)) { - return source_query.to_string(); - } + // Annotation layers now go through the same pipeline as regular layers. + // The source_query for annotations is a VALUES clause with raw column names, + // and this function wraps it with SELECT expressions that rename to prefixed aesthetic names. // Build SELECT list with aesthetic renames, casts let select_exprs = build_layer_select_list(layer, type_requirements); @@ -599,88 +600,133 @@ where /// /// Generates SQL like: `(VALUES (val1_row1, val2_row1), (val1_row2, val2_row2)) AS t(col1, col2)` /// -/// This function handles array recycling on-the-fly: -/// - Determines max array length from all literal aesthetics -/// - Replicates scalars to match max length -/// - Validates that all arrays have compatible lengths (1 or max) +/// This function: +/// 1. Moves positional/required/array parameters from layer.parameters to layer.mappings +/// 2. Handles array recycling on-the-fly (determines max length, replicates scalars) +/// 3. Validates that all arrays have compatible lengths (1 or max) +/// 4. Builds the VALUES clause with raw aesthetic column names +/// 5. Converts parameter values to Column/AnnotationColumn mappings +/// +/// For annotation layers: +/// - Positional aesthetics (pos1, pos2): use Column (data coordinate space, participate in scales) +/// - Non-positional aesthetics (color, size): use AnnotationColumn (visual space, identity scale) /// /// # Arguments /// -/// * `layer` - The annotation layer with aesthetics as Literal values +/// * `layer` - The annotation layer with aesthetics in parameters (will be modified) /// /// # Returns /// /// A complete SQL expression ready to use as a FROM clause -fn build_annotation_values_clause(layer: &Layer) -> Result { +fn process_annotation_layer(layer: &mut Layer) -> Result { use crate::plot::ArrayElement; - // Step 1: Determine max array length from all literal aesthetics + // Step 1: Identify which parameters to use for annotation data + // Only process positional aesthetics, required aesthetics, and array parameters + // (non-positional non-required scalars stay in parameters as geom settings) + let required_aesthetics = layer.geom.aesthetics().required(); + let param_keys: Vec = layer.parameters.keys().cloned().collect(); + + // Collect parameters we'll use, checking criteria and filtering NULLs + let mut annotation_params: Vec<(String, ParameterValue)> = Vec::new(); + + for param_name in param_keys { + // Skip if already in mappings + if layer.mappings.contains_key(¶m_name) { + continue; + } + + let Some(value) = layer.parameters.get(¶m_name) else { + continue; + }; + + // Filter out NULL aesthetics - they mean "use geom default" + if value.is_null() { + continue; + } + + // Check if this is a positional aesthetic OR a required aesthetic OR an array + let is_positional = crate::plot::aesthetic::is_positional_aesthetic(¶m_name); + let is_required = required_aesthetics.contains(¶m_name.as_str()); + let is_array = matches!(value, ParameterValue::Array(_)); + + // Only process positional/required/array parameters + if is_positional || is_required || is_array { + annotation_params.push((param_name.clone(), value.clone())); + } + } + + // Step 2: Determine max array length from all annotation parameters let mut max_length = 1; - let mut array_lengths: Vec<(String, usize)> = Vec::new(); - for (aesthetic, value) in &layer.mappings.aesthetics { - if let crate::AestheticValue::Literal(param) = value { - if let crate::plot::ParameterValue::Array(arr) = param { - let len = arr.len(); - if len > 1 { - array_lengths.push((aesthetic.clone(), len)); - if max_length > 1 && len != max_length { - // Multiple different non-1 lengths - error - return Err(GgsqlError::ValidationError(format!( - "PLACE annotation layer has mismatched array lengths: '{}' has length {}, but another has length {}", - aesthetic, len, max_length - ))); - } - if len > max_length { - max_length = len; - } - } - } + for (aesthetic, value) in &annotation_params { + // Only check array values + let ParameterValue::Array(arr) = value else { + continue; + }; + + let len = arr.len(); + if len <= 1 { + continue; + } + + if max_length > 1 && len != max_length { + // Multiple different non-1 lengths - error + return Err(GgsqlError::ValidationError(format!( + "PLACE annotation layer has mismatched array lengths: '{}' has length {}, but another has length {}", + aesthetic, len, max_length + ))); + } + if len > max_length { + max_length = len; } } - // Step 2: Collect all aesthetic mappings and recycle scalars to max_length + // Step 3: Build VALUES clause and create final mappings simultaneously let mut columns: Vec> = Vec::new(); let mut column_names = Vec::new(); - for (aesthetic, value) in &layer.mappings.aesthetics { - if let crate::AestheticValue::Literal(param) = value { - let column_values = match param { - crate::plot::ParameterValue::Array(arr) => { - // Arrays: use as-is (already validated to be compatible) - arr.clone() - } - crate::plot::ParameterValue::Number(num) => { - // Scalar number: replicate to max_length - vec![ArrayElement::Number(*num); max_length] - } - crate::plot::ParameterValue::String(s) => { - // Scalar string: replicate to max_length - vec![ArrayElement::String(s.clone()); max_length] - } - crate::plot::ParameterValue::Boolean(b) => { - // Scalar boolean: replicate to max_length - vec![ArrayElement::Boolean(*b); max_length] - } - crate::plot::ParameterValue::Null => { - // NULL aesthetics are filtered in process_annotation_layers() - unreachable!("NULL is filtered before reaching annotation VALUES clause") - } - }; + for (aesthetic, param) in &annotation_params { + // Build column data for VALUES clause using rep() to handle scalars and arrays uniformly + let column_values = match param.clone().rep(max_length)? { + ParameterValue::Array(arr) => arr, + _ => unreachable!("rep() always returns Array variant"), + }; - columns.push(column_values); - column_names.push(naming::aesthetic_column(aesthetic)); - } + columns.push(column_values); + // Use raw aesthetic names (not prefixed) so annotations go through + // the same column→aesthetic renaming pipeline as regular layers + column_names.push(aesthetic.clone()); + + // Create final mapping directly (no intermediate Literal step) + let is_positional = crate::plot::aesthetic::is_positional_aesthetic(aesthetic); + let mapping_value = if is_positional { + // Positional aesthetics use Column (participate in scales) + AestheticValue::Column { + name: aesthetic.clone(), // Raw aesthetic name from VALUES clause + original_name: None, + is_dummy: false, + } + } else { + // Non-positional aesthetics use AnnotationColumn (identity scale) + AestheticValue::AnnotationColumn { + name: aesthetic.clone(), // Raw aesthetic name from VALUES clause + } + }; + + layer.mappings.insert(aesthetic.clone(), mapping_value); + // Remove from parameters now that it's in mappings + layer.parameters.remove(aesthetic); } - // Step 3: Build VALUES rows: (val1_row1, val2_row1), (val1_row2, val2_row2), ... + // Step 4: Build VALUES rows let mut rows = Vec::new(); for i in 0..max_length { let values: Vec = columns.iter().map(|col| col[i].to_sql()).collect(); rows.push(format!("({})", values.join(", "))); } - // Step 4: Build complete VALUES clause with column names + // Step 5: Build complete SQL query let values_clause = rows.join(", "); let column_list = column_names .iter() @@ -688,68 +734,76 @@ fn build_annotation_values_clause(layer: &Layer) -> Result { .collect::>() .join(", "); - // Return a SELECT statement wrapping the VALUES clause - // This matches the format of regular layer queries and allows proper wrapping by schema queries - Ok(format!( + let sql = format!( "SELECT * FROM (VALUES {}) AS t({})", values_clause, column_list - )) + ); + + Ok(sql) } #[cfg(test)] mod tests { use super::*; - use crate::plot::{AestheticValue, ArrayElement, DataSource, Geom, Layer, ParameterValue}; + use crate::plot::{ArrayElement, DataSource, Geom, Layer, ParameterValue}; #[test] - fn test_build_annotation_values_clause_single_scalar() { + fn test_annotation_single_scalar() { let mut layer = Layer::new(Geom::text()); layer.source = Some(DataSource::Annotation); + // Put values in parameters (not mappings) - process_annotation_layer will process them layer - .mappings - .insert("pos1", AestheticValue::Literal(ParameterValue::Number(5.0))); + .parameters + .insert("pos1".to_string(), ParameterValue::Number(5.0)); layer - .mappings - .insert("pos2", AestheticValue::Literal(ParameterValue::Number(10.0))); - layer.mappings.insert( - "label", - AestheticValue::Literal(ParameterValue::String("Test".to_string())), + .parameters + .insert("pos2".to_string(), ParameterValue::Number(10.0)); + layer.parameters.insert( + "label".to_string(), + ParameterValue::String("Test".to_string()), ); - let result = build_annotation_values_clause(&layer).unwrap(); + let result = process_annotation_layer(&mut layer).unwrap(); - // Should produce: SELECT * FROM (VALUES (...)) AS t("__ggsql_aes_pos1__", ...) + // Should produce: SELECT * FROM (VALUES (...)) AS t("pos1", "pos2", "label") + // Uses raw aesthetic names, not prefixed names assert!(result.contains("VALUES")); // Check all values are present (order may vary due to HashMap) assert!(result.contains("5")); assert!(result.contains("10")); assert!(result.contains("'Test'")); - assert!(result.contains("__ggsql_aes_pos1__")); - assert!(result.contains("__ggsql_aes_pos2__")); - assert!(result.contains("__ggsql_aes_label__")); + // Raw aesthetic names in column list + assert!(result.contains("\"pos1\"")); + assert!(result.contains("\"pos2\"")); + assert!(result.contains("\"label\"")); + + // After processing, mappings should have Column/AnnotationColumn values + assert!(layer.mappings.contains_key("pos1")); + assert!(layer.mappings.contains_key("pos2")); + assert!(layer.mappings.contains_key("label")); } #[test] - fn test_build_annotation_values_clause_array_recycling() { + fn test_annotation_array_recycling() { let mut layer = Layer::new(Geom::text()); layer.source = Some(DataSource::Annotation); - layer.mappings.insert( - "pos1", - AestheticValue::Literal(ParameterValue::Array(vec![ + layer.parameters.insert( + "pos1".to_string(), + ParameterValue::Array(vec![ ArrayElement::Number(1.0), ArrayElement::Number(2.0), ArrayElement::Number(3.0), - ])), + ]), ); layer - .mappings - .insert("pos2", AestheticValue::Literal(ParameterValue::Number(10.0))); - layer.mappings.insert( - "label", - AestheticValue::Literal(ParameterValue::String("Same".to_string())), + .parameters + .insert("pos2".to_string(), ParameterValue::Number(10.0)); + layer.parameters.insert( + "label".to_string(), + ParameterValue::String("Same".to_string()), ); - let result = build_annotation_values_clause(&layer).unwrap(); + let result = process_annotation_layer(&mut layer).unwrap(); // Should recycle scalar pos2 and label to match array length (3) assert!(result.contains("VALUES")); @@ -762,31 +816,31 @@ mod tests { } #[test] - fn test_build_annotation_values_clause_mismatched_arrays() { + fn test_annotation_mismatched_arrays() { let mut layer = Layer::new(Geom::text()); layer.source = Some(DataSource::Annotation); - layer.mappings.insert( - "pos1", - AestheticValue::Literal(ParameterValue::Array(vec![ + layer.parameters.insert( + "pos1".to_string(), + ParameterValue::Array(vec![ ArrayElement::Number(1.0), ArrayElement::Number(2.0), ArrayElement::Number(3.0), - ])), + ]), ); - layer.mappings.insert( - "pos2", - AestheticValue::Literal(ParameterValue::Array(vec![ - ArrayElement::Number(10.0), - ArrayElement::Number(20.0), - ])), + layer.parameters.insert( + "pos2".to_string(), + ParameterValue::Array(vec![ArrayElement::Number(10.0), ArrayElement::Number(20.0)]), ); - let result = build_annotation_values_clause(&layer); + let result = process_annotation_layer(&mut layer); // Should error with mismatched lengths assert!(result.is_err()); let err_msg = result.unwrap_err().to_string(); - assert!(err_msg.contains("mismatched array lengths"), "Error message should mention mismatched arrays"); + assert!( + err_msg.contains("mismatched array lengths"), + "Error message should mention mismatched arrays" + ); // Error should mention one of the aesthetics (order may vary) assert!( err_msg.contains("pos1") || err_msg.contains("pos2"), @@ -795,25 +849,19 @@ mod tests { } #[test] - fn test_build_annotation_values_clause_multiple_arrays_same_length() { + fn test_annotation_multiple_arrays_same_length() { let mut layer = Layer::new(Geom::text()); layer.source = Some(DataSource::Annotation); - layer.mappings.insert( - "pos1", - AestheticValue::Literal(ParameterValue::Array(vec![ - ArrayElement::Number(1.0), - ArrayElement::Number(2.0), - ])), + layer.parameters.insert( + "pos1".to_string(), + ParameterValue::Array(vec![ArrayElement::Number(1.0), ArrayElement::Number(2.0)]), ); - layer.mappings.insert( - "pos2", - AestheticValue::Literal(ParameterValue::Array(vec![ - ArrayElement::Number(10.0), - ArrayElement::Number(20.0), - ])), + layer.parameters.insert( + "pos2".to_string(), + ParameterValue::Array(vec![ArrayElement::Number(10.0), ArrayElement::Number(20.0)]), ); - let result = build_annotation_values_clause(&layer).unwrap(); + let result = process_annotation_layer(&mut layer).unwrap(); // Both arrays have length 2, should work (order may vary) assert!(result.contains("VALUES")); @@ -823,27 +871,26 @@ mod tests { } #[test] - fn test_build_annotation_values_clause_mixed_types() { + fn test_annotation_mixed_types() { let mut layer = Layer::new(Geom::text()); layer.source = Some(DataSource::Annotation); layer - .mappings - .insert("pos1", AestheticValue::Literal(ParameterValue::Number(5.0))); - layer.mappings.insert( - "label", - AestheticValue::Literal(ParameterValue::String("Text".to_string())), - ); - layer.mappings.insert( - "visible", - AestheticValue::Literal(ParameterValue::Boolean(true)), + .parameters + .insert("pos1".to_string(), ParameterValue::Number(5.0)); + layer + .parameters + .insert("pos2".to_string(), ParameterValue::Number(10.0)); + layer.parameters.insert( + "label".to_string(), + ParameterValue::String("Text".to_string()), ); - let result = build_annotation_values_clause(&layer).unwrap(); + let result = process_annotation_layer(&mut layer).unwrap(); // Should handle different types (order may vary) assert!(result.contains("VALUES")); assert!(result.contains("5")); + assert!(result.contains("10")); assert!(result.contains("'Text'")); - assert!(result.contains("TRUE") || result.contains("true"), "Should contain boolean value"); } } diff --git a/src/execute/mod.rs b/src/execute/mod.rs index 5a173011..70f3c454 100644 --- a/src/execute/mod.rs +++ b/src/execute/mod.rs @@ -935,10 +935,10 @@ pub fn prepare_data_with_reader(query: &str, reader: &R) -> Result = specs[0] .layers - .iter() + .iter_mut() .map(|l| layer::layer_source_query(l, &materialized_ctes, has_global_table)) .collect::>>()?; @@ -1231,7 +1231,6 @@ pub fn prepare_data_with_reader(query: &str, reader: &R) -> Result [1.2, 2.5, 3.1, 2.8, 1.9, 2.2, 3.5, 2.1, 1.8, 2.9], bins => 5 + "#; + + let result = prepare_data_with_reader(query, &reader).unwrap(); + + assert_eq!(result.specs.len(), 1); + assert_eq!( + result.specs[0].layers.len(), + 1, + "Should have one PLACE layer" + ); + + let histogram_layer = &result.specs[0].layers[0]; + assert_eq!(histogram_layer.geom, crate::Geom::histogram()); + assert!( + matches!(histogram_layer.source, Some(DataSource::Annotation)), + "PLACE layer should have Annotation source" ); + + // After stat transform, pos1 should be remapped to bin assert!( - matches!( - annotation_layer.mappings.get("size"), - Some(AestheticValue::Literal(ParameterValue::Number(n))) if *n == 14.0 - ), - "size should be in mappings as Literal" + histogram_layer.mappings.contains_key("pos1"), + "Histogram should have pos1 aesthetic (bin start)" + ); + assert!( + histogram_layer.mappings.contains_key("pos1end"), + "Histogram should have pos1end aesthetic (bin end)" + ); + assert!( + histogram_layer.mappings.contains_key("pos2"), + "Histogram should have pos2 aesthetic (count)" + ); + + // Verify the data has binned results + let histogram_key = histogram_layer.data_key.as_ref().unwrap(); + let histogram_df = result.data.get(histogram_key).unwrap(); + + assert!( + histogram_df.height() > 0, + "Histogram should produce binned data" + ); + assert!( + histogram_df.height() <= 5, + "Histogram with 5 bins should produce at most 5 rows" ); - // Parameters should be empty after resolve_aesthetics moves them to mappings + // Verify the binned data has the expected columns assert!( - annotation_layer.parameters.is_empty(), - "All SETTING parameters should be moved to mappings" + histogram_df.column("__ggsql_aes_pos1__").is_ok(), + "Should have bin start column" + ); + assert!( + histogram_df.column("__ggsql_aes_pos1end__").is_ok(), + "Should have bin end column" + ); + assert!( + histogram_df.column("__ggsql_aes_pos2__").is_ok(), + "Should have count column" ); } @@ -2377,7 +2443,6 @@ mod tests { } } - #[cfg(feature = "duckdb")] #[test] fn test_place_no_global_mapping_inheritance() { @@ -2417,4 +2482,3 @@ mod tests { ); } } - diff --git a/src/execute/scale.rs b/src/execute/scale.rs index 1c7e9364..03c2fe12 100644 --- a/src/execute/scale.rs +++ b/src/execute/scale.rs @@ -529,7 +529,8 @@ pub fn find_schema_columns_for_aesthetic( for aes_name in &aesthetics_to_check { if let Some(value) = layer.mappings.get(aes_name) { match value { - AestheticValue::Column { name, .. } | AestheticValue::AnnotationColumn { name } => { + AestheticValue::Column { name, .. } + | AestheticValue::AnnotationColumn { name } => { if let Some(info) = schema.iter().find(|c| c.name == *name) { infos.push(info.clone()); } diff --git a/src/parser/builder.rs b/src/parser/builder.rs index 736bb7e2..0b4a8462 100644 --- a/src/parser/builder.rs +++ b/src/parser/builder.rs @@ -293,10 +293,9 @@ fn build_visualise_statement(node: &Node, source: &SourceTree) -> Result { // since geom definitions use internal names for their supported/required aesthetics spec.transform_aesthetics_to_internal(); - // Process annotation layers: move positional/required parameters to mappings - // This must happen AFTER transform_aesthetics_to_internal() so parameter keys are in internal space - // Returns error if arrays have mismatched lengths - spec.process_annotation_layers()?; + // Note: Annotation layer processing (moving parameters to mappings) now happens + // during execution in process_annotation_layer(), not during parsing. + // This keeps all annotation-specific logic in one place. Ok(spec) } @@ -508,7 +507,7 @@ fn build_place_layer(node: &Node, source: &SourceTree) -> Result { let mut layer = build_layer(node, source)?; // Mark as annotation layer - // Array recycling happens later during SQL generation in build_annotation_values_clause() + // Array recycling happens later during SQL generation in process_annotation_layer() layer.source = Some(DataSource::Annotation); Ok(layer) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 5e9af460..81e2fe16 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -444,22 +444,29 @@ mod tests { "PLACE layer should have Annotation source" ); - // Verify positional aesthetics moved to mappings with internal names - // (transform_aesthetics_to_internal runs as part of parse_query) - assert!( - specs[0].layers[1].mappings.contains_key("pos1"), - "x should be transformed to pos1 and moved to mappings" + // After parsing, annotation layer parameters stay in parameters + // They are only moved to mappings during execution (in process_annotation_layer) + // (transform_aesthetics_to_internal runs and transforms x→pos1, y→pos2) + assert_eq!( + specs[0].layers[1].parameters.get("pos1"), + Some(&ParameterValue::Number(5.0)), + "x should be transformed to pos1 but remain in parameters at parse time" ); - assert!( - specs[0].layers[1].mappings.contains_key("pos2"), - "y should be transformed to pos2 and moved to mappings" + assert_eq!( + specs[0].layers[1].parameters.get("pos2"), + Some(&ParameterValue::Number(10.0)), + "y should be transformed to pos2 but remain in parameters at parse time" ); - - // Verify non-positional parameter (label) stays in parameters (with internal name = same) assert_eq!( specs[0].layers[1].parameters.get("label"), Some(&ParameterValue::String("Hello".to_string())), - "Non-positional parameters remain in parameters" + "label (required) also remains in parameters at parse time" + ); + + // Mappings should be empty at parse time for annotation layers + assert!( + specs[0].layers[1].mappings.is_empty(), + "Annotation layer mappings should be empty at parse time (populated during execution)" ); } } diff --git a/src/plot/layer/geom/text.rs b/src/plot/layer/geom/text.rs index a185737e..a8cd50fc 100644 --- a/src/plot/layer/geom/text.rs +++ b/src/plot/layer/geom/text.rs @@ -17,7 +17,7 @@ impl GeomTrait for Text { defaults: &[ ("pos1", DefaultAestheticValue::Required), ("pos2", DefaultAestheticValue::Required), - ("label", DefaultAestheticValue::Null), + ("label", DefaultAestheticValue::Required), ("stroke", DefaultAestheticValue::Null), ("size", DefaultAestheticValue::Number(11.0)), ("opacity", DefaultAestheticValue::Number(1.0)), diff --git a/src/plot/main.rs b/src/plot/main.rs index 2f6d0c55..1b4ec591 100644 --- a/src/plot/main.rs +++ b/src/plot/main.rs @@ -216,49 +216,7 @@ impl Plot { /// enabling them to participate in scale training and coordinate transformations. /// /// Note: Array recycling (replicating scalars to match array lengths) happens later - /// during SQL generation in `build_annotation_values_clause()`. - pub fn process_annotation_layers(&mut self) -> Result<(), crate::GgsqlError> { - for layer in &mut self.layers { - // Skip non-annotation layers - if !matches!(&layer.source, Some(DataSource::Annotation)) { - continue; - } - - // Move required/positional/array aesthetics to mappings - let required_aesthetics = layer.geom.aesthetics().required(); - let param_keys: Vec = layer.parameters.keys().cloned().collect(); - - for param_name in param_keys { - // Check if this is a positional aesthetic OR a required aesthetic OR an array - let is_positional = crate::plot::aesthetic::is_positional_aesthetic(¶m_name); - let is_required = required_aesthetics.contains(¶m_name.as_str()); - let is_array = matches!( - layer.parameters.get(¶m_name), - Some(ParameterValue::Array(_)) - ); - - if is_positional || is_required || is_array { - // Skip if already in mappings - if layer.mappings.contains_key(¶m_name) { - continue; - } - - // Move from parameters to mappings (no recycling - happens later in SQL generation) - if let Some(value) = layer.parameters.remove(¶m_name) { - // Filter out NULL aesthetics - they mean "use geom default" - if !value.is_null() { - layer - .mappings - .insert(¶m_name, AestheticValue::Literal(value)); - } - } - } - } - } - - Ok(()) - } - + /// during SQL generation in `process_annotation_layer()`. /// Check if the spec has any layers pub fn has_layers(&self) -> bool { !self.layers.is_empty() @@ -791,125 +749,4 @@ mod tests { "Non-color aesthetic should keep its name" ); } - - #[test] - fn test_process_annotation_layers_moves_positional_aesthetics() { - let mut plot = Plot::new(); - let mut layer = Layer::new(Geom::text()); - layer.source = Some(DataSource::Annotation); - layer.parameters.insert("pos1".to_string(), ParameterValue::Number(5.0)); - layer.parameters.insert("pos2".to_string(), ParameterValue::Number(10.0)); - layer.parameters.insert("size".to_string(), ParameterValue::Number(14.0)); - plot.layers.push(layer); - - plot.process_annotation_layers().unwrap(); - - // Positional aesthetics should be moved to mappings - assert!(plot.layers[0].mappings.contains_key("pos1")); - assert!(plot.layers[0].mappings.contains_key("pos2")); - // Non-positional should stay in parameters - assert!(plot.layers[0].parameters.contains_key("size")); - assert!(!plot.layers[0].mappings.contains_key("size")); - } - - #[test] - fn test_process_annotation_layers_moves_required_aesthetics() { - let mut plot = Plot::new(); - let mut layer = Layer::new(Geom::text()); - layer.source = Some(DataSource::Annotation); - layer.parameters.insert("pos1".to_string(), ParameterValue::Number(5.0)); - layer.parameters.insert("pos2".to_string(), ParameterValue::Number(10.0)); - layer.parameters.insert("label".to_string(), ParameterValue::String("Text".to_string())); - plot.layers.push(layer); - - plot.process_annotation_layers().unwrap(); - - // pos1 and pos2 are required and positional - should be moved to mappings - assert!(plot.layers[0].mappings.contains_key("pos1")); - assert!(plot.layers[0].mappings.contains_key("pos2")); - // label is optional (not required) - should stay in parameters - assert!(!plot.layers[0].mappings.contains_key("label")); - assert!(plot.layers[0].parameters.contains_key("label")); - } - - #[test] - fn test_process_annotation_layers_moves_array_parameters() { - let mut plot = Plot::new(); - let mut layer = Layer::new(Geom::text()); - layer.source = Some(DataSource::Annotation); - layer.parameters.insert("pos1".to_string(), ParameterValue::Array(vec![ - ArrayElement::Number(1.0), - ArrayElement::Number(2.0), - ])); - layer.parameters.insert("stroke".to_string(), ParameterValue::Array(vec![ - ArrayElement::String("red".to_string()), - ArrayElement::String("blue".to_string()), - ])); - plot.layers.push(layer); - - plot.process_annotation_layers().unwrap(); - - // Array parameters should be moved to mappings (even non-positional) - assert!(plot.layers[0].mappings.contains_key("pos1")); - assert!(plot.layers[0].mappings.contains_key("stroke")); - assert!(!plot.layers[0].parameters.contains_key("pos1")); - assert!(!plot.layers[0].parameters.contains_key("stroke")); - } - - #[test] - fn test_process_annotation_layers_filters_null_aesthetics() { - let mut plot = Plot::new(); - let mut layer = Layer::new(Geom::text()); - layer.source = Some(DataSource::Annotation); - layer.parameters.insert("pos1".to_string(), ParameterValue::Number(5.0)); - layer.parameters.insert("pos2".to_string(), ParameterValue::Null); // NULL should be filtered - layer.parameters.insert("label".to_string(), ParameterValue::String("Text".to_string())); - plot.layers.push(layer); - - plot.process_annotation_layers().unwrap(); - - // pos1 should be moved to mappings (positional) - assert!(plot.layers[0].mappings.contains_key("pos1")); - // label should stay in parameters (optional, not positional) - assert!(!plot.layers[0].mappings.contains_key("label")); - assert!(plot.layers[0].parameters.contains_key("label")); - // NULL pos2 should not be in mappings (filtered out) - assert!(!plot.layers[0].mappings.contains_key("pos2")); - // NULL should also be removed from parameters - assert!(!plot.layers[0].parameters.contains_key("pos2")); - } - - #[test] - fn test_process_annotation_layers_skips_non_annotation_layers() { - let mut plot = Plot::new(); - let mut layer = Layer::new(Geom::point()); - layer.source = None; // Regular layer - layer.parameters.insert("pos1".to_string(), ParameterValue::Number(5.0)); - plot.layers.push(layer); - - plot.process_annotation_layers().unwrap(); - - // Parameters should not be moved for non-annotation layers - assert!(plot.layers[0].parameters.contains_key("pos1")); - assert!(!plot.layers[0].mappings.contains_key("pos1")); - } - - #[test] - fn test_process_annotation_layers_keeps_existing_mappings() { - let mut plot = Plot::new(); - let mut layer = Layer::new(Geom::text()); - layer.source = Some(DataSource::Annotation); - layer.mappings.insert("pos1", AestheticValue::Literal(ParameterValue::Number(5.0))); - layer.parameters.insert("pos1".to_string(), ParameterValue::Number(10.0)); // Should be ignored - plot.layers.push(layer); - - plot.process_annotation_layers().unwrap(); - - // Existing mapping should be preserved (not overwritten) - if let Some(AestheticValue::Literal(ParameterValue::Number(n))) = plot.layers[0].mappings.get("pos1") { - assert_eq!(*n, 5.0, "Should keep original mapping value"); - } else { - panic!("pos1 should be a literal number"); - } - } } diff --git a/src/plot/types.rs b/src/plot/types.rs index b3e82039..7db7bc92 100644 --- a/src/plot/types.rs +++ b/src/plot/types.rs @@ -187,9 +187,7 @@ pub enum AestheticValue { /// These columns are generated from user-specified literal values in visual space /// (e.g., color => 'red', size => 10) and use identity scales (no transformation). /// Positional annotations (x, y) use Column instead since they're in data coordinate space. - AnnotationColumn { - name: String, - }, + AnnotationColumn { name: String }, /// Literal value (quoted string, number, or boolean) Literal(ParameterValue), } @@ -227,9 +225,7 @@ impl AestheticValue { /// Create an annotation column mapping (synthesized from PLACE literals) pub fn annotation_column(name: impl Into) -> Self { - Self::AnnotationColumn { - name: name.into(), - } + Self::AnnotationColumn { name: name.into() } } /// Get column name if this is a column mapping diff --git a/src/writer/vegalite/encoding.rs b/src/writer/vegalite/encoding.rs index 52b53260..256d1f4d 100644 --- a/src/writer/vegalite/encoding.rs +++ b/src/writer/vegalite/encoding.rs @@ -802,10 +802,10 @@ pub(super) fn build_encoding_channel( name: col, original_name, is_dummy, - } => build_column_encoding(aesthetic, col, original_name, *is_dummy, false, ctx), + } => build_column_encoding(aesthetic, col, original_name, *is_dummy, true, ctx), AestheticValue::AnnotationColumn { name: col } => { // Non-positional annotation columns use identity scale - build_column_encoding(aesthetic, col, &None, false, true, ctx) + build_column_encoding(aesthetic, col, &None, false, false, ctx) } AestheticValue::Literal(lit) => build_literal_encoding(aesthetic, lit), } From 86549a211ccb7733631fa7c8c481d118be6be69c Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Mon, 9 Mar 2026 17:22:58 +0100 Subject: [PATCH 17/26] bit of cleanup --- src/plot/layer/mod.rs | 16 +++------------- src/plot/main.rs | 16 ++-------------- 2 files changed, 5 insertions(+), 27 deletions(-) diff --git a/src/plot/layer/mod.rs b/src/plot/layer/mod.rs index d2bda03d..acf8ccc6 100644 --- a/src/plot/layer/mod.rs +++ b/src/plot/layer/mod.rs @@ -239,9 +239,6 @@ impl Layer { /// happens in Polars after query execution, before the data goes to the writer. pub fn update_mappings_for_aesthetic_columns(&mut self) { use crate::naming; - use crate::plot::aesthetic::is_positional_aesthetic; - - let is_annotation = matches!(self.source, Some(crate::DataSource::Annotation)); for (aesthetic, value) in self.mappings.aesthetics.iter_mut() { let aes_col_name = naming::aesthetic_column(aesthetic); @@ -263,16 +260,9 @@ impl Layer { *name = aes_col_name; } AestheticValue::Literal(_) => { - // Literals become columns with prefixed aesthetic name - // For annotation layers: - // - Positional aesthetics (x, y): use Column (data coordinate space, participate in scales) - // - Non-positional aesthetics (color, size): use AnnotationColumn (visual space, identity scale) - let is_positional = is_positional_aesthetic(aesthetic); - *value = if is_annotation && !is_positional { - AestheticValue::annotation_column(aes_col_name) - } else { - AestheticValue::standard_column(aes_col_name) - }; + // Literals become standard columns with prefixed aesthetic name + // Note: literals don't have an original_name to preserve + *value = AestheticValue::standard_column(aes_col_name); } } } diff --git a/src/plot/main.rs b/src/plot/main.rs index 1b4ec591..d4d7a7c9 100644 --- a/src/plot/main.rs +++ b/src/plot/main.rs @@ -205,18 +205,6 @@ impl Plot { } } - /// Process annotation layers by moving positional and required aesthetics - /// from parameters to mappings. - /// - /// This must be called AFTER transform_aesthetics_to_internal() so that - /// parameter keys are already in internal space (pos1, pos2, etc.). - /// - /// Positional aesthetics need to be in mappings so they go through the same - /// transformation pipeline (internal→user space) as data-mapped aesthetics, - /// enabling them to participate in scale training and coordinate transformations. - /// - /// Note: Array recycling (replicating scalars to match array lengths) happens later - /// during SQL generation in `process_annotation_layer()`. /// Check if the spec has any layers pub fn has_layers(&self) -> bool { !self.layers.is_empty() @@ -517,11 +505,11 @@ mod tests { assert!(bar.is_supported("pos1")); // Bar accepts optional pos1 assert_eq!(bar.required(), &[] as &[&str]); // No required aesthetics - // Text geom + // Text geom - requires label let text = Geom::text().aesthetics(); assert!(text.is_supported("label")); assert!(text.is_supported("family")); - assert_eq!(text.required(), &["pos1", "pos2"]); + assert_eq!(text.required(), &["pos1", "pos2", "label"]); // Statistical geoms only require pos1 assert_eq!(Geom::histogram().aesthetics().required(), &["pos1"]); From d22fe4716767269782e4a506b4dcdf09f6bf476f Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Tue, 17 Mar 2026 16:13:36 +0100 Subject: [PATCH 18/26] soothe clippy --- src/writer/vegalite/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/writer/vegalite/mod.rs b/src/writer/vegalite/mod.rs index 44f7ffbe..7e5ba9b3 100644 --- a/src/writer/vegalite/mod.rs +++ b/src/writer/vegalite/mod.rs @@ -283,6 +283,7 @@ fn build_layer_encoding( let secondary_encoding = match value { AestheticValue::Column { name: col, .. } => json!({"field": col}), AestheticValue::Literal(lit) => json!({"value": lit.to_json()}), + AestheticValue::AnnotationColumn { name: col } => json!({"field": col}), }; encoding.insert(channel_name, secondary_encoding); continue; From 70ce8cac704e841a7586f31387aad0d85e33a5cc Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Tue, 17 Mar 2026 16:31:05 +0100 Subject: [PATCH 19/26] Document PLACE clause in CLAUDE.md Add terse documentation for PLACE annotation layers and update type definitions to reflect current implementation (Position, DataSource, AnnotationColumn). Co-Authored-By: Claude Sonnet 4.5 --- CLAUDE.md | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/CLAUDE.md b/CLAUDE.md index b1a43363..e40316e8 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -322,8 +322,10 @@ pub enum GlobalMappingItem { pub struct Layer { pub geom: Geom, // Geometric object type + pub position: Position, // Position adjustment (from SETTING position) pub aesthetics: HashMap, // Aesthetic mappings (from MAPPING) pub parameters: HashMap, // Geom parameters (from SETTING) + pub source: Option, // Data source (from MAPPING ... FROM or PLACE) pub filter: Option, // Layer filter (from FILTER) pub partition_by: Vec, // Grouping columns (from PARTITION BY) } @@ -337,8 +339,22 @@ pub enum Geom { Text, Label, Segment, Arrow, Rule, Linear, ErrorBar, } +pub enum DataSource { + Identifier(String), // Table/CTE name + FilePath(String), // File path (quoted) + Annotation, // PLACE clause (no external data) +} + +pub enum Position { + Identity, // No adjustment + Stack, // Stack elements (bars, areas) + Dodge, // Dodge side-by-side (bars) + Jitter, // Jitter points randomly +} + pub enum AestheticValue { - Column(String), // Unquoted column reference: revenue AS x + Column(String), // Column from data: revenue AS x + AnnotationColumn(String), // Annotation literal (PLACE): uses identity scale Literal(ParameterValue), // Quoted literal: 'value' AS fill } @@ -1178,6 +1194,7 @@ Where `` can be: | ----------- | ---------- | ----------------- | ----------------------------------------- | | `VISUALISE` | ✅ Yes | Entry point | `VISUALISE date AS x, revenue AS y` | | `DRAW` | ✅ Yes | Define layers | `DRAW line MAPPING date AS x, value AS y` | +| `PLACE` | ✅ Yes | Annotation layers | `PLACE point SETTING x => 5, y => 10` | | `SCALE` | ✅ Yes | Configure scales | `SCALE x VIA date` | | `FACET` | ❌ No | Small multiples | `FACET region` | | `PROJECT` | ❌ No | Coordinate system | `PROJECT TO cartesian` | @@ -1289,6 +1306,29 @@ DRAW line FILTER year >= 2020 ``` +### PLACE Clause (Annotation Layers) + +**Syntax**: + +```sql +PLACE SETTING => , ... +``` + +Creates annotation layers with literal values only (no data mappings). All aesthetics set via SETTING; supports arrays that are recycled to longest length. No FILTER/PARTITION BY/ORDER BY support. + +**Examples**: + +```sql +-- Single annotation +PLACE point SETTING x => 5, y => 10, color => 'red' + +-- Multiple annotations (array recycling) +PLACE point SETTING x => [1, 2, 3], y => [10, 20, 30] + +-- Reference line +PLACE rule SETTING x => 5, color => 'red' +``` + ### SCALE Clause **Syntax**: From 8510e6ba40865087095234b08262e7889b73622a Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Tue, 17 Mar 2026 17:19:50 +0100 Subject: [PATCH 20/26] use place in examples --- doc/syntax/layer/type/linear.qmd | 4 ++-- doc/syntax/layer/type/rule.qmd | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/syntax/layer/type/linear.qmd b/doc/syntax/layer/type/linear.qmd index ceeb93ac..92f60459 100644 --- a/doc/syntax/layer/type/linear.qmd +++ b/doc/syntax/layer/type/linear.qmd @@ -46,10 +46,10 @@ Add a simple reference line to a scatterplot: ```{ggsql} VISUALISE FROM ggsql:penguins DRAW point MAPPING bill_len AS x, bill_dep AS y - DRAW linear MAPPING 0.4 AS coef, -1 AS intercept + PLACE linear SETTING coef => 0.4, intercept => -1 ``` -Add multiple reference lines with different colors from a separate dataset: +Add multiple reference lines with different colors from a separate dataset. Note we're mapping from data here, so we use `DRAW` instead of `PLACE`. ```{ggsql} WITH lines AS ( diff --git a/doc/syntax/layer/type/rule.qmd b/doc/syntax/layer/type/rule.qmd index ac3a0158..dbfacd72 100644 --- a/doc/syntax/layer/type/rule.qmd +++ b/doc/syntax/layer/type/rule.qmd @@ -40,7 +40,7 @@ WHERE Month = 5 VISUALISE DRAW line MAPPING date AS x, temperature AS y - DRAW rule MAPPING 70 AS y + PLACE rule SETTING y => 70 ``` Add a vertical line to mark a specific value: @@ -48,10 +48,10 @@ Add a vertical line to mark a specific value: ```{ggsql} VISUALISE FROM ggsql:penguins DRAW point MAPPING bill_len AS x, bill_dep AS y - DRAW rule MAPPING 45 AS x + PLACE rule SETTING x => 45 ``` -Add multiple threshold lines with different colors: +Add multiple threshold lines with different colors. Note that because we're mapping data from data, we use the `DRAW` clause instead of the `PLACE` clause. ```{ggsql} WITH thresholds AS ( From 81774c0414cbee3b5179229f5e28d1f5f48441f9 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Tue, 17 Mar 2026 18:01:16 +0100 Subject: [PATCH 21/26] Add PLACE clause syntax highlighting to VS Code and Quarto The PLACE clause was added to the grammar but was missing from the syntax highlighting definitions. This adds complete highlighting support for PLACE clauses in both VS Code (TextMate grammar) and Quarto documentation (KDE XML). Changes: - VS Code: Add place-clause pattern with geom type and SETTING support - Quarto: Add PlaceClause context with proper clause transitions - Both: Update all clause boundaries to recognize PLACE Co-Authored-By: Claude Sonnet 4.5 --- doc/ggsql.xml | 49 +++++++++++++++++++++ ggsql-vscode/syntaxes/ggsql.tmLanguage.json | 29 +++++++++--- 2 files changed, 71 insertions(+), 7 deletions(-) diff --git a/doc/ggsql.xml b/doc/ggsql.xml index 2cd20e9a..74c8b8f2 100644 --- a/doc/ggsql.xml +++ b/doc/ggsql.xml @@ -90,6 +90,7 @@ DRAW + PLACE SCALE PROJECT FACET @@ -404,6 +405,7 @@ + @@ -451,6 +453,7 @@ + @@ -481,6 +484,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -491,6 +534,7 @@ + @@ -537,6 +581,7 @@ + @@ -580,6 +625,7 @@ + @@ -622,6 +668,7 @@ + @@ -659,6 +706,7 @@ + @@ -699,6 +747,7 @@ + diff --git a/ggsql-vscode/syntaxes/ggsql.tmLanguage.json b/ggsql-vscode/syntaxes/ggsql.tmLanguage.json index 40d5306c..ecd49865 100644 --- a/ggsql-vscode/syntaxes/ggsql.tmLanguage.json +++ b/ggsql-vscode/syntaxes/ggsql.tmLanguage.json @@ -8,6 +8,7 @@ { "include": "#numbers" }, { "include": "#sql-functions" }, { "include": "#draw-clause" }, + { "include": "#place-clause" }, { "include": "#scale-clause" }, { "include": "#facet-clause" }, { "include": "#project-clause" }, @@ -217,7 +218,7 @@ "beginCaptures": { "1": { "name": "keyword.other.ggsql" } }, - "end": "(?i)(?=\\b(DRAW|SCALE|PROJECT|FACET|LABEL|THEME|VISUALISE|VISUALIZE|SELECT|WHERE|WITH)\\b)", + "end": "(?i)(?=\\b(DRAW|PLACE|SCALE|PROJECT|FACET|LABEL|THEME|VISUALISE|VISUALIZE|SELECT|WHERE|WITH)\\b)", "patterns": [ { "include": "#comments" }, { "include": "#strings" }, @@ -290,7 +291,21 @@ "beginCaptures": { "1": { "name": "keyword.other.ggsql" } }, - "end": "(?i)(?=\\b(DRAW|SCALE|PROJECT|FACET|LABEL|THEME|VISUALISE|VISUALIZE|SELECT|WHERE|WITH)\\b)", + "end": "(?i)(?=\\b(DRAW|PLACE|SCALE|PROJECT|FACET|LABEL|THEME|VISUALISE|VISUALIZE|SELECT|WHERE|WITH)\\b)", + "patterns": [ + { + "name": "support.type.geom.ggsql", + "match": "\\b(point|line|path|bar|col|area|tile|polygon|ribbon|histogram|density|smooth|boxplot|violin|text|label|segment|arrow|rule|linear|errorbar)\\b" + }, + { "include": "#common-clause-patterns" } + ] + }, + "place-clause": { + "begin": "(?i)\\b(PLACE)\\b", + "beginCaptures": { + "1": { "name": "keyword.other.ggsql" } + }, + "end": "(?i)(?=\\b(DRAW|PLACE|SCALE|PROJECT|FACET|LABEL|THEME|VISUALISE|VISUALIZE|SELECT|WHERE|WITH)\\b)", "patterns": [ { "name": "support.type.geom.ggsql", @@ -304,7 +319,7 @@ "beginCaptures": { "1": { "name": "keyword.other.ggsql" } }, - "end": "(?i)(?=\\b(DRAW|SCALE|PROJECT|FACET|LABEL|THEME|VISUALISE|VISUALIZE|SELECT|WHERE|WITH)\\b)", + "end": "(?i)(?=\\b(DRAW|PLACE|SCALE|PROJECT|FACET|LABEL|THEME|VISUALISE|VISUALIZE|SELECT|WHERE|WITH)\\b)", "patterns": [ { "name": "keyword.control.scale-modifier.ggsql", @@ -330,7 +345,7 @@ "beginCaptures": { "1": { "name": "keyword.other.ggsql" } }, - "end": "(?i)(?=\\b(DRAW|SCALE|PROJECT|FACET|LABEL|THEME|VISUALISE|VISUALIZE|SELECT|WHERE|WITH)\\b)", + "end": "(?i)(?=\\b(DRAW|PLACE|SCALE|PROJECT|FACET|LABEL|THEME|VISUALISE|VISUALIZE|SELECT|WHERE|WITH)\\b)", "patterns": [ { "name": "support.type.property.ggsql", @@ -352,7 +367,7 @@ "beginCaptures": { "1": { "name": "keyword.other.ggsql" } }, - "end": "(?i)(?=\\b(DRAW|SCALE|PROJECT|FACET|LABEL|THEME|VISUALISE|VISUALIZE|SELECT|WHERE|WITH)\\b)", + "end": "(?i)(?=\\b(DRAW|PLACE|SCALE|PROJECT|FACET|LABEL|THEME|VISUALISE|VISUALIZE|SELECT|WHERE|WITH)\\b)", "patterns": [ { "name": "support.type.project.ggsql", @@ -370,7 +385,7 @@ "beginCaptures": { "1": { "name": "keyword.other.ggsql" } }, - "end": "(?i)(?=\\b(DRAW|SCALE|PROJECT|FACET|LABEL|THEME|VISUALISE|VISUALIZE|SELECT|WHERE|WITH)\\b)", + "end": "(?i)(?=\\b(DRAW|PLACE|SCALE|PROJECT|FACET|LABEL|THEME|VISUALISE|VISUALIZE|SELECT|WHERE|WITH)\\b)", "patterns": [ { "name": "support.type.property.ggsql", @@ -384,7 +399,7 @@ "beginCaptures": { "1": { "name": "keyword.other.ggsql" } }, - "end": "(?i)(?=\\b(DRAW|SCALE|PROJECT|FACET|LABEL|THEME|VISUALISE|VISUALIZE|SELECT|WHERE|WITH)\\b)", + "end": "(?i)(?=\\b(DRAW|PLACE|SCALE|PROJECT|FACET|LABEL|THEME|VISUALISE|VISUALIZE|SELECT|WHERE|WITH)\\b)", "patterns": [ { "name": "support.type.theme.ggsql", From 7e99f9d5b6810631ad7b60db480bf7019cc545be Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Tue, 17 Mar 2026 22:40:18 +0100 Subject: [PATCH 22/26] try to cast literals as date --- src/execute/layer.rs | 110 +++++++++++++++++++++++++++++++++++++++++-- src/plot/types.rs | 78 +++++++++++++++++++++++++++++- 2 files changed, 184 insertions(+), 4 deletions(-) diff --git a/src/execute/layer.rs b/src/execute/layer.rs index 8c68fd6e..1c67dce3 100644 --- a/src/execute/layer.rs +++ b/src/execute/layer.rs @@ -192,12 +192,35 @@ pub fn apply_remappings_post_query(df: DataFrame, layer: &Layer) -> Result polars::prelude::Series { - use polars::prelude::{NamedFrom, Series}; + use crate::plot::ArrayElement; + use polars::prelude::{DataType, NamedFrom, Series, TimeUnit}; match lit { ParameterValue::Number(n) => Series::new(name.into(), vec![*n; len]), - ParameterValue::String(s) => Series::new(name.into(), vec![s.as_str(); len]), + ParameterValue::String(s) => { + // Try to parse as temporal types (DateTime > Date > Time) + match ArrayElement::String(s.clone()).try_as_temporal() { + ArrayElement::DateTime(micros) => Series::new(name.into(), vec![micros; len]) + .cast(&DataType::Datetime(TimeUnit::Microseconds, None)) + .expect("DateTime cast should not fail"), + ArrayElement::Date(days) => Series::new(name.into(), vec![days; len]) + .cast(&DataType::Date) + .expect("Date cast should not fail"), + ArrayElement::Time(nanos) => Series::new(name.into(), vec![nanos; len]) + .cast(&DataType::Time) + .expect("Time cast should not fail"), + ArrayElement::String(_) => { + // Parsing failed, use original string + Series::new(name.into(), vec![s.as_str(); len]) + } + _ => unreachable!("try_as_temporal only returns String or temporal types"), + } + } ParameterValue::Boolean(b) => Series::new(name.into(), vec![*b; len]), ParameterValue::Array(_) | ParameterValue::Null => { unreachable!("Arrays are never moved to mappings; NULL is filtered in process_annotation_layers()") @@ -726,11 +749,18 @@ fn process_annotation_layer(layer: &mut Layer) -> Result { for (aesthetic, param) in &annotation_params { // Build column data for VALUES clause using rep() to handle scalars and arrays uniformly - let column_values = match param.clone().rep(max_length)? { + let mut column_values = match param.clone().rep(max_length)? { ParameterValue::Array(arr) => arr, _ => unreachable!("rep() always returns Array variant"), }; + // Try to parse string elements as temporal types (Date/DateTime/Time) + // This ensures literals like '1973-06-01' become Date columns, not String columns + column_values = column_values + .into_iter() + .map(|elem| elem.try_as_temporal()) + .collect(); + columns.push(column_values); // Use raw aesthetic names (not prefixed) so annotations go through // the same column→aesthetic renaming pipeline as regular layers @@ -1002,4 +1032,78 @@ mod tests { assert!(result.contains("10")); assert!(result.contains("'Text'")); } + + #[test] + fn test_literal_to_series_date_parsing() { + use polars::prelude::DataType; + + // Date literal should parse to Date type + let series = literal_to_series( + "date_col", + &ParameterValue::String("1973-06-01".to_string()), + 5, + ); + assert_eq!( + series.dtype(), + &DataType::Date, + "Date string should parse to Date type" + ); + assert_eq!(series.len(), 5); + } + + #[test] + fn test_literal_to_series_datetime_parsing() { + use polars::prelude::{DataType, TimeUnit}; + + // DateTime literal should parse to Datetime type + let series = literal_to_series( + "dt_col", + &ParameterValue::String("2024-03-17T14:30:00".to_string()), + 3, + ); + assert!( + matches!( + series.dtype(), + DataType::Datetime(TimeUnit::Microseconds, None) + ), + "DateTime string should parse to Datetime type" + ); + assert_eq!(series.len(), 3); + } + + #[test] + fn test_literal_to_series_time_parsing() { + use polars::prelude::DataType; + + // Time literal should parse to Time type + let series = literal_to_series( + "time_col", + &ParameterValue::String("14:30:00".to_string()), + 4, + ); + assert_eq!( + series.dtype(), + &DataType::Time, + "Time string should parse to Time type" + ); + assert_eq!(series.len(), 4); + } + + #[test] + fn test_literal_to_series_string_fallback() { + use polars::prelude::DataType; + + // Non-temporal string should remain String type + let series = literal_to_series( + "text_col", + &ParameterValue::String("not a date".to_string()), + 2, + ); + assert_eq!( + series.dtype(), + &DataType::String, + "Non-temporal string should remain String type" + ); + assert_eq!(series.len(), 2); + } } diff --git a/src/plot/types.rs b/src/plot/types.rs index fedb727c..ec28b525 100644 --- a/src/plot/types.rs +++ b/src/plot/types.rs @@ -683,7 +683,8 @@ impl ArrayElement { } Self::Date(d) => { // Convert days since epoch to DATE - format!("DATE '1970-01-01' + INTERVAL {} DAY", d) + // Use CAST to ensure result is DATE type, not TIMESTAMP + format!("CAST(DATE '1970-01-01' + INTERVAL {} DAY AS DATE)", d) } Self::DateTime(dt) => { // Convert microseconds since epoch to TIMESTAMP @@ -791,6 +792,39 @@ impl ArrayElement { None } + /// Try to parse string as temporal type (Date/DateTime/Time). + /// + /// If this is a String variant, attempts to parse it as a temporal type + /// in order of specificity: DateTime > Date > Time. + /// If parsing succeeds, returns the temporal variant. Otherwise returns self unchanged. + /// + /// For non-String variants, returns self unchanged. + /// + /// # Example + /// ```ignore + /// let elem = ArrayElement::String("1973-06-01".to_string()); + /// let parsed = elem.try_as_temporal(); + /// // parsed is now ArrayElement::Date(...) + /// ``` + pub fn try_as_temporal(self) -> Self { + if let Self::String(ref s) = self { + // Try DateTime first (most specific) + if let Some(dt) = Self::from_datetime_string(s) { + return dt; + } + // Try Date + if let Some(d) = Self::from_date_string(s) { + return d; + } + // Try Time + if let Some(t) = Self::from_time_string(s) { + return t; + } + } + // Fall back to original value if not a string or parsing failed + self + } + /// Convert to string for HashMap keys and display pub fn to_key_string(&self) -> String { match self { @@ -1514,4 +1548,46 @@ mod tests { assert_eq!(s, "foo"); } } + + #[test] + fn test_try_as_temporal_date() { + let elem = ArrayElement::String("1973-06-01".to_string()); + let parsed = elem.try_as_temporal(); + assert!(matches!(parsed, ArrayElement::Date(_))); + assert_eq!(parsed.to_key_string(), "1973-06-01"); + } + + #[test] + fn test_try_as_temporal_datetime() { + let elem = ArrayElement::String("2024-03-17T14:30:00".to_string()); + let parsed = elem.try_as_temporal(); + assert!(matches!(parsed, ArrayElement::DateTime(_))); + } + + #[test] + fn test_try_as_temporal_time() { + let elem = ArrayElement::String("14:30:00".to_string()); + let parsed = elem.try_as_temporal(); + assert!(matches!(parsed, ArrayElement::Time(_))); + } + + #[test] + fn test_try_as_temporal_non_temporal_string() { + let elem = ArrayElement::String("not a date".to_string()); + let parsed = elem.try_as_temporal(); + assert!(matches!(parsed, ArrayElement::String(_))); + assert_eq!(parsed.to_key_string(), "not a date"); + } + + #[test] + fn test_try_as_temporal_non_string() { + // Non-string elements should pass through unchanged + let elem = ArrayElement::Number(42.0); + let parsed = elem.try_as_temporal(); + assert!(matches!(parsed, ArrayElement::Number(_))); + + let elem = ArrayElement::Boolean(true); + let parsed = elem.try_as_temporal(); + assert!(matches!(parsed, ArrayElement::Boolean(_))); + } } From 586df16aa6b66dfa3542be5536454cab2c9d8cd6 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Tue, 17 Mar 2026 22:51:22 +0100 Subject: [PATCH 23/26] add some more examples --- doc/syntax/layer/type/rect.qmd | 16 +++++++--------- doc/syntax/layer/type/text.qmd | 10 ++++++++++ 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/doc/syntax/layer/type/rect.qmd b/doc/syntax/layer/type/rect.qmd index 2c1ea360..79134a45 100644 --- a/doc/syntax/layer/type/rect.qmd +++ b/doc/syntax/layer/type/rect.qmd @@ -92,17 +92,15 @@ VISUALISE start AS xmin, end AS xmax, min AS ymin, max AS ymax DRAW rect ``` -Using a rectangle as an annotation. - - +Using a rectangle as an annotation. Note we're using the `PLACE` clause here instead of `DRAW` because we're not mapping from data. ```{ggsql} VISUALISE FROM ggsql:airquality - DRAW rect MAPPING - '1973-06-01' AS xmin, - '1973-06-30' AS xmax, - 50 AS ymin, - 100 AS ymax, - 'June' AS colour + PLACE rect SETTING + xmin => '1973-06-01', + xmax => '1973-06-30', + ymin => 50, + ymax => 100, + colour => 'dodgerblue' DRAW line MAPPING Date AS x, Temp AS y ``` \ No newline at end of file diff --git a/doc/syntax/layer/type/text.qmd b/doc/syntax/layer/type/text.qmd index a84bc2df..0e601c5b 100644 --- a/doc/syntax/layer/type/text.qmd +++ b/doc/syntax/layer/type/text.qmd @@ -137,3 +137,13 @@ VISUALISE island AS x, n AS y SCALE y FROM [0, 200] ``` +You can use `PLACE` to annotate a plot directly without needing to map data. + +```{ggsql} +VISUALISE bill_len AS x, bill_dep AS y FROM ggsql:penguins + DRAW point MAPPING species AS colour + PLACE text SETTING + label => ['Adelie', 'Chinstrap', 'Gentoo'], + x => [40, 50, 50], + y => [19, 19, 15] +``` \ No newline at end of file From d193ce1307f841579369d7881724059f4dc02edc Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Wed, 18 Mar 2026 10:04:23 +0100 Subject: [PATCH 24/26] Fix failing PLACE annotation layer tests Three tests were failing due to incorrect aesthetics and expectations: 1. Text geom uses 'fontsize' aesthetic, not 'size' - updated test queries 2. Test incorrectly expected no fill on annotation layers - annotation layers correctly inherit geom defaults (fill='black' for text), they just don't inherit global mappings from VISUALISE clause 3. Simplified test_end_to_end_place_field_vs_value_encoding to use point instead of text geom, avoiding complex font-based Vega-Lite nesting All 1215 library tests now pass. Co-Authored-By: Claude Sonnet 4.5 --- src/execute/mod.rs | 22 ++++++++++++++++++---- src/lib.rs | 22 ++++++++++------------ 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/src/execute/mod.rs b/src/execute/mod.rs index b0e5f7b2..caf6449a 100644 --- a/src/execute/mod.rs +++ b/src/execute/mod.rs @@ -2341,7 +2341,7 @@ mod tests { SELECT * FROM test_place VISUALISE x, y DRAW point - PLACE text SETTING x => 2, y => 25, label => 'Annotation', size => 14 + PLACE text SETTING x => 2, y => 25, label => 'Annotation', fontsize => 14 "#; let result = prepare_data_with_reader(query, &reader).unwrap(); @@ -2592,13 +2592,27 @@ mod tests { !annotation_layer.mappings.contains_key("color"), "PLACE layer should not inherit color from global mappings" ); + + // PLACE layer should have geom default fill='black', not global color='red' assert!( - !annotation_layer.mappings.contains_key("fill"), - "PLACE layer should not inherit fill from global mappings" + annotation_layer.mappings.contains_key("fill"), + "PLACE layer should have default fill from text geom" ); + match annotation_layer.mappings.aesthetics.get("fill") { + Some(AestheticValue::Literal(crate::plot::types::ParameterValue::String(s))) if s == "black" => { + // Correct: geom default fill + } + Some(AestheticValue::Column { name, .. }) if name == "color" => { + panic!("PLACE layer incorrectly inherited global color mapping as fill"); + } + other => { + panic!("Expected fill=Literal('black'), got: {:?}", other); + } + } + assert!( !annotation_layer.mappings.contains_key("stroke"), - "PLACE layer should not inherit stroke from global mappings" + "PLACE layer should not have stroke (text geom default is null)" ); } #[cfg(feature = "duckdb")] diff --git a/src/lib.rs b/src/lib.rs index 3e1e375e..992f25ce 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -769,8 +769,8 @@ mod integration_tests { let query = r#" SELECT 1 AS x, 10 AS y UNION ALL SELECT 2 AS x, 20 AS y VISUALISE x, y - DRAW point - PLACE text SETTING x => 5, y => 30, label => 'Annotation', size => 16, stroke => 'red' + DRAW line + PLACE point SETTING x => 5, y => 30, size => 100, stroke => 'red' "#; let prepared = execute::prepare_data_with_reader(query, &reader).unwrap(); @@ -780,18 +780,18 @@ mod integration_tests { let json_str = writer.write(&prepared.specs[0], &prepared.data).unwrap(); let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap(); - // Find the text layer (should be second layer) - let text_layer = &vl_spec["layer"][1]; + // Find the point annotation layer (should be second layer) + let point_layer = &vl_spec["layer"][1]; // Mark can be either a string or an object with type - let mark_type = if text_layer["mark"].is_string() { - text_layer["mark"].as_str().unwrap() + let mark_type = if point_layer["mark"].is_string() { + point_layer["mark"].as_str().unwrap() } else { - text_layer["mark"]["type"].as_str().unwrap() + point_layer["mark"]["type"].as_str().unwrap() }; - assert_eq!(mark_type, "text", "Second layer should be text"); + assert_eq!(mark_type, "point", "Second layer should be point annotation"); - let encoding = &text_layer["encoding"]; + let encoding = &point_layer["encoding"]; // Positional aesthetics should be field encodings (have "field" key) assert!( @@ -806,7 +806,6 @@ mod integration_tests { ); // Non-positional aesthetics should be value encodings (have "value" key) - // Note: size may be scaled/transformed, so just check it's present as a value assert!( encoding["size"]["value"].is_number(), "size should be a value encoding with numeric value: {:?}", @@ -815,12 +814,11 @@ mod integration_tests { // Note: stroke color goes through resolve_aesthetics and may be in different location // Just verify it's present somewhere as a literal - let text_mark = &text_layer["mark"]; let has_stroke_value = encoding .get("stroke") .and_then(|s| s.get("value")) .is_some() - || text_mark.get("stroke").is_some(); + || point_layer["mark"].get("stroke").is_some(); assert!(has_stroke_value, "stroke should be present as a value"); } From a263e625270c1d22ada6c4961ceec4ce93efaac0 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Wed, 18 Mar 2026 10:05:01 +0100 Subject: [PATCH 25/26] cargo fmt --- src/execute/mod.rs | 4 +++- src/lib.rs | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/execute/mod.rs b/src/execute/mod.rs index caf6449a..c8c7475f 100644 --- a/src/execute/mod.rs +++ b/src/execute/mod.rs @@ -2599,7 +2599,9 @@ mod tests { "PLACE layer should have default fill from text geom" ); match annotation_layer.mappings.aesthetics.get("fill") { - Some(AestheticValue::Literal(crate::plot::types::ParameterValue::String(s))) if s == "black" => { + Some(AestheticValue::Literal(crate::plot::types::ParameterValue::String(s))) + if s == "black" => + { // Correct: geom default fill } Some(AestheticValue::Column { name, .. }) if name == "color" => { diff --git a/src/lib.rs b/src/lib.rs index 992f25ce..62761387 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -789,7 +789,10 @@ mod integration_tests { } else { point_layer["mark"]["type"].as_str().unwrap() }; - assert_eq!(mark_type, "point", "Second layer should be point annotation"); + assert_eq!( + mark_type, "point", + "Second layer should be point annotation" + ); let encoding = &point_layer["encoding"]; From 7ed076c339cb944c4d27c4185a7c59fa18b608ef Mon Sep 17 00:00:00 2001 From: Teun van den Brand <49372158+teunbrand@users.noreply.github.com> Date: Wed, 18 Mar 2026 11:37:23 +0100 Subject: [PATCH 26/26] Apply suggestions from code review Co-authored-by: Thomas Lin Pedersen --- doc/syntax/clause/place.qmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/syntax/clause/place.qmd b/doc/syntax/clause/place.qmd index 6321bbed..8c37f32e 100644 --- a/doc/syntax/clause/place.qmd +++ b/doc/syntax/clause/place.qmd @@ -2,7 +2,7 @@ title: "Create annotation layers with `PLACE`" --- -The `PLACE` clause is the little sibling of the [`DRAW` clause](../clause/draw.qmd) that also creates layers. +The `PLACE` clause is the little sibling of the [`DRAW` clause](draw.qmd) that also creates layers. A layer created with `PLACE` has no mappings to data, all aesthetics are set using literal values instead. ## Clause syntax @@ -25,7 +25,7 @@ everything is declared inline. SETTING => , ... ``` -The `SETTING` clause can be used for to different things: +The `SETTING` clause can be used for two different things: * *Setting aesthetics*: All aesthetics in `PLACE` layers are specified using literal value, e.g. 'red' (as in the color red). Aesthetics that are set will not go through a scale but will use the provided value as-is.