diff --git a/CLAUDE.md b/CLAUDE.md index 97bd8ee5..d408cc55 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -322,8 +322,10 @@ pub enum GlobalMappingItem { pub struct Layer { pub geom: Geom, // Geometric object type + pub position: Position, // Position adjustment (from SETTING position) pub aesthetics: HashMap, // Aesthetic mappings (from MAPPING) pub parameters: HashMap, // Geom parameters (from SETTING) + pub source: Option, // Data source (from MAPPING ... FROM or PLACE) pub filter: Option, // Layer filter (from FILTER) pub partition_by: Vec, // Grouping columns (from PARTITION BY) } @@ -337,8 +339,22 @@ pub enum Geom { Text, Segment, Arrow, Rule, Linear, ErrorBar, } +pub enum DataSource { + Identifier(String), // Table/CTE name + FilePath(String), // File path (quoted) + Annotation, // PLACE clause (no external data) +} + +pub enum Position { + Identity, // No adjustment + Stack, // Stack elements (bars, areas) + Dodge, // Dodge side-by-side (bars) + Jitter, // Jitter points randomly +} + pub enum AestheticValue { - Column(String), // Unquoted column reference: revenue AS x + Column(String), // Column from data: revenue AS x + AnnotationColumn(String), // Annotation literal (PLACE): uses identity scale Literal(ParameterValue), // Quoted literal: 'value' AS fill } @@ -1178,6 +1194,7 @@ Where `` can be: | ----------- | ---------- | ----------------- | ----------------------------------------- | | `VISUALISE` | ✅ Yes | Entry point | `VISUALISE date AS x, revenue AS y` | | `DRAW` | ✅ Yes | Define layers | `DRAW line MAPPING date AS x, value AS y` | +| `PLACE` | ✅ Yes | Annotation layers | `PLACE point SETTING x => 5, y => 10` | | `SCALE` | ✅ Yes | Configure scales | `SCALE x VIA date` | | `FACET` | ❌ No | Small multiples | `FACET region` | | `PROJECT` | ❌ No | Coordinate system | `PROJECT TO cartesian` | @@ -1289,6 +1306,29 @@ DRAW line FILTER year >= 2020 ``` +### PLACE Clause (Annotation Layers) + +**Syntax**: + +```sql +PLACE SETTING => , ... +``` + +Creates annotation layers with literal values only (no data mappings). All aesthetics set via SETTING; supports arrays that are recycled to longest length. No FILTER/PARTITION BY/ORDER BY support. + +**Examples**: + +```sql +-- Single annotation +PLACE point SETTING x => 5, y => 10, color => 'red' + +-- Multiple annotations (array recycling) +PLACE point SETTING x => [1, 2, 3], y => [10, 20, 30] + +-- Reference line +PLACE rule SETTING x => 5, color => 'red' +``` + ### SCALE Clause **Syntax**: diff --git a/doc/_quarto.yml b/doc/_quarto.yml index 59d62765..5e464b54 100644 --- a/doc/_quarto.yml +++ b/doc/_quarto.yml @@ -40,6 +40,8 @@ website: href: syntax/clause/visualise.qmd - text: "`DRAW`" href: syntax/clause/draw.qmd + - text: "`PLACE`" + href: syntax/clause/place.qmd - text: "`SCALE`" href: syntax/clause/scale.qmd - text: "`FACET`" @@ -77,6 +79,8 @@ website: href: syntax/clause/visualise.qmd - text: "`DRAW`" href: syntax/clause/draw.qmd + - text: "`PLACE`" + href: syntax/clause/place.qmd - text: "`SCALE`" href: syntax/clause/scale.qmd - text: "`FACET`" diff --git a/doc/ggsql.xml b/doc/ggsql.xml index 2cd20e9a..74c8b8f2 100644 --- a/doc/ggsql.xml +++ b/doc/ggsql.xml @@ -90,6 +90,7 @@ DRAW + PLACE SCALE PROJECT FACET @@ -404,6 +405,7 @@ + @@ -451,6 +453,7 @@ + @@ -481,6 +484,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -491,6 +534,7 @@ + @@ -537,6 +581,7 @@ + @@ -580,6 +625,7 @@ + @@ -622,6 +668,7 @@ + @@ -659,6 +706,7 @@ + @@ -699,6 +747,7 @@ + diff --git a/doc/syntax/clause/place.qmd b/doc/syntax/clause/place.qmd new file mode 100644 index 00000000..8c37f32e --- /dev/null +++ b/doc/syntax/clause/place.qmd @@ -0,0 +1,37 @@ +--- +title: "Create annotation layers with `PLACE`" +--- + +The `PLACE` clause is the little sibling of the [`DRAW` clause](draw.qmd) that also creates layers. +A layer created with `PLACE` has no mappings to data, all aesthetics are set using literal values instead. + +## Clause syntax +The `PLACE` clause takes just a type and a setting clause, all of them required. + +```ggsql +PLACE + SETTING => , ... +``` + +Like `DRAW`, the layer type is required and specifies the type of layer to draw, like `point` or `text`. +It defines how the remaining settings are interpreted. +The [main syntax page](../index.qmd#layers) has a list of all available layer types + +Unlike the `DRAW` clause, the `PLACE` clause does not support `FILTER`, `PARTITION BY`, and `ORDER BY` clauses since +everything is declared inline. + +### `SETTING` +```ggsql +SETTING => , ... +``` + +The `SETTING` clause can be used for two different things: + +* *Setting aesthetics*: All aesthetics in `PLACE` layers are specified using literal value, e.g. 'red' (as in the color red). +Aesthetics that are set will not go through a scale but will use the provided value as-is. +You cannot set an aesthetic to a column, only to a literal values. +Contrary to `DRAW` layers, `PLACE` layers can take multiple literal values in an array. +* *Setting parameters*: Some layers take additional arguments that control how they behave. +Often, but not always, these modify the statistical transformation in some way. +An example would be the binwidth parameter in histogram which controls the width of each bin during histogram calculation. +This is not a statistical property since it is not related to each record, but to the calculation as a whole. diff --git a/doc/syntax/index.qmd b/doc/syntax/index.qmd index cdf77e07..42cc62e7 100644 --- a/doc/syntax/index.qmd +++ b/doc/syntax/index.qmd @@ -7,6 +7,7 @@ ggsql augments the standard SQL syntax with a number of new clauses to describe - [`VISUALISE`](clause/visualise.qmd) initiates the visualisation part of the query - [`DRAW`](clause/draw.qmd) adds a new layer to the visualisation + - [`PLACE`](clause/place.qmd) adds an annotation layer - [`SCALE`](clause/scale.qmd) specify how an aesthetic should be scaled - [`FACET`](clause/facet.qmd) describes how data should be split into small multiples - [`PROJECT`](clause/project.qmd) is used for selecting the coordinate system to use diff --git a/doc/syntax/layer/type/linear.qmd b/doc/syntax/layer/type/linear.qmd index ceeb93ac..92f60459 100644 --- a/doc/syntax/layer/type/linear.qmd +++ b/doc/syntax/layer/type/linear.qmd @@ -46,10 +46,10 @@ Add a simple reference line to a scatterplot: ```{ggsql} VISUALISE FROM ggsql:penguins DRAW point MAPPING bill_len AS x, bill_dep AS y - DRAW linear MAPPING 0.4 AS coef, -1 AS intercept + PLACE linear SETTING coef => 0.4, intercept => -1 ``` -Add multiple reference lines with different colors from a separate dataset: +Add multiple reference lines with different colors from a separate dataset. Note we're mapping from data here, so we use `DRAW` instead of `PLACE`. ```{ggsql} WITH lines AS ( diff --git a/doc/syntax/layer/type/rect.qmd b/doc/syntax/layer/type/rect.qmd index 2c1ea360..79134a45 100644 --- a/doc/syntax/layer/type/rect.qmd +++ b/doc/syntax/layer/type/rect.qmd @@ -92,17 +92,15 @@ VISUALISE start AS xmin, end AS xmax, min AS ymin, max AS ymax DRAW rect ``` -Using a rectangle as an annotation. - - +Using a rectangle as an annotation. Note we're using the `PLACE` clause here instead of `DRAW` because we're not mapping from data. ```{ggsql} VISUALISE FROM ggsql:airquality - DRAW rect MAPPING - '1973-06-01' AS xmin, - '1973-06-30' AS xmax, - 50 AS ymin, - 100 AS ymax, - 'June' AS colour + PLACE rect SETTING + xmin => '1973-06-01', + xmax => '1973-06-30', + ymin => 50, + ymax => 100, + colour => 'dodgerblue' DRAW line MAPPING Date AS x, Temp AS y ``` \ No newline at end of file diff --git a/doc/syntax/layer/type/rule.qmd b/doc/syntax/layer/type/rule.qmd index ac3a0158..dbfacd72 100644 --- a/doc/syntax/layer/type/rule.qmd +++ b/doc/syntax/layer/type/rule.qmd @@ -40,7 +40,7 @@ WHERE Month = 5 VISUALISE DRAW line MAPPING date AS x, temperature AS y - DRAW rule MAPPING 70 AS y + PLACE rule SETTING y => 70 ``` Add a vertical line to mark a specific value: @@ -48,10 +48,10 @@ Add a vertical line to mark a specific value: ```{ggsql} VISUALISE FROM ggsql:penguins DRAW point MAPPING bill_len AS x, bill_dep AS y - DRAW rule MAPPING 45 AS x + PLACE rule SETTING x => 45 ``` -Add multiple threshold lines with different colors: +Add multiple threshold lines with different colors. Note that because we're mapping data from data, we use the `DRAW` clause instead of the `PLACE` clause. ```{ggsql} WITH thresholds AS ( diff --git a/doc/syntax/layer/type/text.qmd b/doc/syntax/layer/type/text.qmd index a84bc2df..0e601c5b 100644 --- a/doc/syntax/layer/type/text.qmd +++ b/doc/syntax/layer/type/text.qmd @@ -137,3 +137,13 @@ VISUALISE island AS x, n AS y SCALE y FROM [0, 200] ``` +You can use `PLACE` to annotate a plot directly without needing to map data. + +```{ggsql} +VISUALISE bill_len AS x, bill_dep AS y FROM ggsql:penguins + DRAW point MAPPING species AS colour + PLACE text SETTING + label => ['Adelie', 'Chinstrap', 'Gentoo'], + x => [40, 50, 50], + y => [19, 19, 15] +``` \ No newline at end of file diff --git a/ggsql-vscode/syntaxes/ggsql.tmLanguage.json b/ggsql-vscode/syntaxes/ggsql.tmLanguage.json index 40d5306c..ecd49865 100644 --- a/ggsql-vscode/syntaxes/ggsql.tmLanguage.json +++ b/ggsql-vscode/syntaxes/ggsql.tmLanguage.json @@ -8,6 +8,7 @@ { "include": "#numbers" }, { "include": "#sql-functions" }, { "include": "#draw-clause" }, + { "include": "#place-clause" }, { "include": "#scale-clause" }, { "include": "#facet-clause" }, { "include": "#project-clause" }, @@ -217,7 +218,7 @@ "beginCaptures": { "1": { "name": "keyword.other.ggsql" } }, - "end": "(?i)(?=\\b(DRAW|SCALE|PROJECT|FACET|LABEL|THEME|VISUALISE|VISUALIZE|SELECT|WHERE|WITH)\\b)", + "end": "(?i)(?=\\b(DRAW|PLACE|SCALE|PROJECT|FACET|LABEL|THEME|VISUALISE|VISUALIZE|SELECT|WHERE|WITH)\\b)", "patterns": [ { "include": "#comments" }, { "include": "#strings" }, @@ -290,7 +291,21 @@ "beginCaptures": { "1": { "name": "keyword.other.ggsql" } }, - "end": "(?i)(?=\\b(DRAW|SCALE|PROJECT|FACET|LABEL|THEME|VISUALISE|VISUALIZE|SELECT|WHERE|WITH)\\b)", + "end": "(?i)(?=\\b(DRAW|PLACE|SCALE|PROJECT|FACET|LABEL|THEME|VISUALISE|VISUALIZE|SELECT|WHERE|WITH)\\b)", + "patterns": [ + { + "name": "support.type.geom.ggsql", + "match": "\\b(point|line|path|bar|col|area|tile|polygon|ribbon|histogram|density|smooth|boxplot|violin|text|label|segment|arrow|rule|linear|errorbar)\\b" + }, + { "include": "#common-clause-patterns" } + ] + }, + "place-clause": { + "begin": "(?i)\\b(PLACE)\\b", + "beginCaptures": { + "1": { "name": "keyword.other.ggsql" } + }, + "end": "(?i)(?=\\b(DRAW|PLACE|SCALE|PROJECT|FACET|LABEL|THEME|VISUALISE|VISUALIZE|SELECT|WHERE|WITH)\\b)", "patterns": [ { "name": "support.type.geom.ggsql", @@ -304,7 +319,7 @@ "beginCaptures": { "1": { "name": "keyword.other.ggsql" } }, - "end": "(?i)(?=\\b(DRAW|SCALE|PROJECT|FACET|LABEL|THEME|VISUALISE|VISUALIZE|SELECT|WHERE|WITH)\\b)", + "end": "(?i)(?=\\b(DRAW|PLACE|SCALE|PROJECT|FACET|LABEL|THEME|VISUALISE|VISUALIZE|SELECT|WHERE|WITH)\\b)", "patterns": [ { "name": "keyword.control.scale-modifier.ggsql", @@ -330,7 +345,7 @@ "beginCaptures": { "1": { "name": "keyword.other.ggsql" } }, - "end": "(?i)(?=\\b(DRAW|SCALE|PROJECT|FACET|LABEL|THEME|VISUALISE|VISUALIZE|SELECT|WHERE|WITH)\\b)", + "end": "(?i)(?=\\b(DRAW|PLACE|SCALE|PROJECT|FACET|LABEL|THEME|VISUALISE|VISUALIZE|SELECT|WHERE|WITH)\\b)", "patterns": [ { "name": "support.type.property.ggsql", @@ -352,7 +367,7 @@ "beginCaptures": { "1": { "name": "keyword.other.ggsql" } }, - "end": "(?i)(?=\\b(DRAW|SCALE|PROJECT|FACET|LABEL|THEME|VISUALISE|VISUALIZE|SELECT|WHERE|WITH)\\b)", + "end": "(?i)(?=\\b(DRAW|PLACE|SCALE|PROJECT|FACET|LABEL|THEME|VISUALISE|VISUALIZE|SELECT|WHERE|WITH)\\b)", "patterns": [ { "name": "support.type.project.ggsql", @@ -370,7 +385,7 @@ "beginCaptures": { "1": { "name": "keyword.other.ggsql" } }, - "end": "(?i)(?=\\b(DRAW|SCALE|PROJECT|FACET|LABEL|THEME|VISUALISE|VISUALIZE|SELECT|WHERE|WITH)\\b)", + "end": "(?i)(?=\\b(DRAW|PLACE|SCALE|PROJECT|FACET|LABEL|THEME|VISUALISE|VISUALIZE|SELECT|WHERE|WITH)\\b)", "patterns": [ { "name": "support.type.property.ggsql", @@ -384,7 +399,7 @@ "beginCaptures": { "1": { "name": "keyword.other.ggsql" } }, - "end": "(?i)(?=\\b(DRAW|SCALE|PROJECT|FACET|LABEL|THEME|VISUALISE|VISUALIZE|SELECT|WHERE|WITH)\\b)", + "end": "(?i)(?=\\b(DRAW|PLACE|SCALE|PROJECT|FACET|LABEL|THEME|VISUALISE|VISUALIZE|SELECT|WHERE|WITH)\\b)", "patterns": [ { "name": "support.type.theme.ggsql", diff --git a/src/execute/casting.rs b/src/execute/casting.rs index 5bf26c40..0f731812 100644 --- a/src/execute/casting.rs +++ b/src/execute/casting.rs @@ -3,12 +3,12 @@ //! This module handles determining which columns need type casting based on //! scale requirements and updating type info accordingly. +use crate::naming; use crate::plot::scale::coerce_dtypes; -use crate::plot::{CastTargetType, Layer, ParameterValue, Plot}; +use crate::plot::{CastTargetType, Plot}; use crate::reader::SqlDialect; -use crate::{naming, DataSource}; use polars::prelude::{DataType, TimeUnit}; -use std::collections::{HashMap, HashSet}; +use std::collections::HashMap; use super::schema::TypeInfo; @@ -23,24 +23,6 @@ pub struct TypeRequirement { pub sql_type_name: String, } -/// Format a literal value as SQL -pub fn literal_to_sql(lit: &ParameterValue) -> String { - match lit { - ParameterValue::String(s) => format!("'{}'", s.replace('\'', "''")), - ParameterValue::Number(n) => n.to_string(), - ParameterValue::Boolean(b) => { - if *b { - "TRUE".to_string() - } else { - "FALSE".to_string() - } - } - ParameterValue::Array(_) | ParameterValue::Null => { - unreachable!("Grammar prevents arrays and null in literal aesthetic mappings") - } - } -} - /// Determine which columns need casting based on scale requirements. /// /// For each layer, collects columns that need casting to match the scale's @@ -202,32 +184,3 @@ pub fn update_type_info_for_casting(type_info: &mut [TypeInfo], requirements: &[ } } } - -/// Determine the data source table name for a layer. -/// -/// Returns the table/CTE name to query from: -/// - Layer with explicit source (CTE, table, file) → that source name -/// - Layer using global data → global table name -pub fn determine_layer_source( - layer: &Layer, - materialized_ctes: &HashSet, - has_global: bool, -) -> String { - match &layer.source { - Some(DataSource::Identifier(name)) => { - if materialized_ctes.contains(name) { - naming::cte_table(name) - } else { - name.clone() - } - } - Some(DataSource::FilePath(path)) => { - format!("'{}'", path) - } - None => { - // Layer uses global data - caller must ensure has_global is true - debug_assert!(has_global, "Layer has no source and no global data"); - naming::global_table() - } - } -} diff --git a/src/execute/layer.rs b/src/execute/layer.rs index c7edea2c..1c67dce3 100644 --- a/src/execute/layer.rs +++ b/src/execute/layer.rs @@ -14,24 +14,48 @@ use crate::{naming, DataFrame, GgsqlError, Result}; use polars::prelude::DataType; use std::collections::{HashMap, HashSet}; -use super::casting::{literal_to_sql, TypeRequirement}; +use super::casting::TypeRequirement; use super::schema::build_aesthetic_schema; /// Build the source query for a layer. /// -/// Returns `SELECT * FROM source` where source is either: -/// - The layer's explicit source (table, CTE, file) -/// - The global table if layer has no explicit source +/// Returns a complete query that can be executed to retrieve the layer's data: +/// - Annotation layers → VALUES clause with all aesthetic columns (modifies layer mappings) +/// - Table/CTE layers → `SELECT * FROM table_or_cte` +/// - File path layers → `SELECT * FROM 'path'` +/// - Layers without explicit source → `SELECT * FROM __ggsql_global__` /// -/// Note: This is distinct from `build_layer_base_query()` which builds a full -/// SELECT with aesthetic column renames and type casts. +/// For annotation layers, this function processes parameters and converts them to +/// Column/AnnotationColumn mappings, so the layer is modified in place. pub fn layer_source_query( - layer: &Layer, + layer: &mut Layer, materialized_ctes: &HashSet, has_global: bool, -) -> String { - let source = super::casting::determine_layer_source(layer, materialized_ctes, has_global); - format!("SELECT * FROM {}", source) +) -> Result { + match &layer.source { + Some(crate::DataSource::Annotation) => { + // Annotation layers: process parameters and return complete VALUES clause (with on-the-fly recycling) + process_annotation_layer(layer) + } + Some(crate::DataSource::Identifier(name)) => { + // Regular table or CTE + let source = if materialized_ctes.contains(name) { + naming::cte_table(name) + } else { + name.clone() + }; + Ok(format!("SELECT * FROM {}", source)) + } + Some(crate::DataSource::FilePath(path)) => { + // File path source + Ok(format!("SELECT * FROM '{}'", path)) + } + None => { + // Layer uses global data + debug_assert!(has_global, "Layer has no source and no global data"); + Ok(format!("SELECT * FROM {}", naming::global_table())) + } + } } /// Build the SELECT list for a layer query with aesthetic-renamed columns and casting. @@ -78,7 +102,7 @@ pub fn build_layer_select_list( for (aesthetic, value) in &layer.mappings.aesthetics { let aes_col_name = naming::aesthetic_column(aesthetic); let select_expr = match value { - AestheticValue::Column { name, .. } => { + AestheticValue::Column { name, .. } | AestheticValue::AnnotationColumn { name } => { // Check if this column needs casting if let Some(req) = cast_map.get(name.as_str()) { // Cast and rename to prefixed aesthetic name @@ -93,7 +117,7 @@ pub fn build_layer_select_list( } AestheticValue::Literal(lit) => { // Literals become columns with prefixed aesthetic name - format!("{} AS \"{}\"", literal_to_sql(lit), aes_col_name) + format!("{} AS \"{}\"", lit.to_sql(), aes_col_name) } }; @@ -126,7 +150,7 @@ pub fn apply_remappings_post_query(df: DataFrame, layer: &Layer) -> Result { + AestheticValue::Column { name, .. } | AestheticValue::AnnotationColumn { name } => { // Check if this stat column exists in the DataFrame if df.column(name).is_ok() { df.rename(name, target_col_name.into()).map_err(|e| { @@ -168,15 +192,38 @@ pub fn apply_remappings_post_query(df: DataFrame, layer: &Layer) -> Result polars::prelude::Series { - use polars::prelude::{NamedFrom, Series}; + use crate::plot::ArrayElement; + use polars::prelude::{DataType, NamedFrom, Series, TimeUnit}; match lit { ParameterValue::Number(n) => Series::new(name.into(), vec![*n; len]), - ParameterValue::String(s) => Series::new(name.into(), vec![s.as_str(); len]), + ParameterValue::String(s) => { + // Try to parse as temporal types (DateTime > Date > Time) + match ArrayElement::String(s.clone()).try_as_temporal() { + ArrayElement::DateTime(micros) => Series::new(name.into(), vec![micros; len]) + .cast(&DataType::Datetime(TimeUnit::Microseconds, None)) + .expect("DateTime cast should not fail"), + ArrayElement::Date(days) => Series::new(name.into(), vec![days; len]) + .cast(&DataType::Date) + .expect("Date cast should not fail"), + ArrayElement::Time(nanos) => Series::new(name.into(), vec![nanos; len]) + .cast(&DataType::Time) + .expect("Time cast should not fail"), + ArrayElement::String(_) => { + // Parsing failed, use original string + Series::new(name.into(), vec![s.as_str(); len]) + } + _ => unreachable!("try_as_temporal only returns String or temporal types"), + } + } ParameterValue::Boolean(b) => Series::new(name.into(), vec![*b; len]), ParameterValue::Array(_) | ParameterValue::Null => { - unreachable!("Grammar prevents arrays and null in literal aesthetic mappings") + unreachable!("Arrays are never moved to mappings; NULL is filtered in process_annotation_layers()") } } } @@ -287,6 +334,9 @@ pub fn apply_pre_stat_transform( /// 2. Renames columns to aesthetic names (e.g., "Date" AS "__ggsql_aes_x__") /// 3. Applies type casts based on scale requirements /// +/// For annotation layers, the source_query is already the complete VALUES clause, +/// so it's returned as-is (no wrapping, filtering, or casting needed). +/// /// The resulting query can be used for: /// - Schema completion (fetching min/max values) /// - Scale input range resolution @@ -296,8 +346,8 @@ pub fn apply_pre_stat_transform( /// # Arguments /// /// * `layer` - The layer configuration with aesthetic mappings -/// * `source_query` - The base query for the layer's data source -/// * `type_requirements` - Columns that need type casting +/// * `source_query` - The base query for the layer's data source (for annotations, this is already the VALUES clause) +/// * `type_requirements` - Columns that need type casting (not applicable to annotations) /// /// # Returns /// @@ -307,6 +357,10 @@ pub fn build_layer_base_query( source_query: &str, type_requirements: &[TypeRequirement], ) -> String { + // Annotation layers now go through the same pipeline as regular layers. + // The source_query for annotations is a VALUES clause with raw column names, + // and this function wraps it with SELECT expressions that rename to prefixed aesthetic names. + // Build SELECT list with aesthetic renames, casts let select_exprs = build_layer_select_list(layer, type_requirements); let select_clause = if select_exprs.is_empty() { @@ -603,6 +657,159 @@ where Ok(final_query) } +/// Build a VALUES clause for an annotation layer with all aesthetic columns. +/// +/// Generates SQL like: `(VALUES (val1_row1, val2_row1), (val1_row2, val2_row2)) AS t(col1, col2)` +/// +/// This function: +/// 1. Moves positional/required/array parameters from layer.parameters to layer.mappings +/// 2. Handles array recycling on-the-fly (determines max length, replicates scalars) +/// 3. Validates that all arrays have compatible lengths (1 or max) +/// 4. Builds the VALUES clause with raw aesthetic column names +/// 5. Converts parameter values to Column/AnnotationColumn mappings +/// +/// For annotation layers: +/// - Positional aesthetics (pos1, pos2): use Column (data coordinate space, participate in scales) +/// - Non-positional aesthetics (color, size): use AnnotationColumn (visual space, identity scale) +/// +/// # Arguments +/// +/// * `layer` - The annotation layer with aesthetics in parameters (will be modified) +/// +/// # Returns +/// +/// A complete SQL expression ready to use as a FROM clause +fn process_annotation_layer(layer: &mut Layer) -> Result { + use crate::plot::ArrayElement; + + // Step 1: Identify which parameters to use for annotation data + // Only process positional aesthetics, required aesthetics, and array parameters + // (non-positional non-required scalars stay in parameters as geom settings) + let required_aesthetics = layer.geom.aesthetics().required(); + let param_keys: Vec = layer.parameters.keys().cloned().collect(); + + // Collect parameters we'll use, checking criteria and filtering NULLs + let mut annotation_params: Vec<(String, ParameterValue)> = Vec::new(); + + for param_name in param_keys { + // Skip if already in mappings + if layer.mappings.contains_key(¶m_name) { + continue; + } + + let Some(value) = layer.parameters.get(¶m_name) else { + continue; + }; + + // Filter out NULL aesthetics - they mean "use geom default" + if value.is_null() { + continue; + } + + // Check if this is a positional aesthetic OR a required aesthetic OR an array + let is_positional = crate::plot::aesthetic::is_positional_aesthetic(¶m_name); + let is_required = required_aesthetics.contains(¶m_name.as_str()); + let is_array = matches!(value, ParameterValue::Array(_)); + + // Only process positional/required/array parameters + if is_positional || is_required || is_array { + annotation_params.push((param_name.clone(), value.clone())); + } + } + + // Step 2: Determine max array length from all annotation parameters + let mut max_length = 1; + + for (aesthetic, value) in &annotation_params { + // Only check array values + let ParameterValue::Array(arr) = value else { + continue; + }; + + let len = arr.len(); + if len <= 1 { + continue; + } + + if max_length > 1 && len != max_length { + // Multiple different non-1 lengths - error + return Err(GgsqlError::ValidationError(format!( + "PLACE annotation layer has mismatched array lengths: '{}' has length {}, but another has length {}", + aesthetic, len, max_length + ))); + } + if len > max_length { + max_length = len; + } + } + + // Step 3: Build VALUES clause and create final mappings simultaneously + let mut columns: Vec> = Vec::new(); + let mut column_names = Vec::new(); + + for (aesthetic, param) in &annotation_params { + // Build column data for VALUES clause using rep() to handle scalars and arrays uniformly + let mut column_values = match param.clone().rep(max_length)? { + ParameterValue::Array(arr) => arr, + _ => unreachable!("rep() always returns Array variant"), + }; + + // Try to parse string elements as temporal types (Date/DateTime/Time) + // This ensures literals like '1973-06-01' become Date columns, not String columns + column_values = column_values + .into_iter() + .map(|elem| elem.try_as_temporal()) + .collect(); + + columns.push(column_values); + // Use raw aesthetic names (not prefixed) so annotations go through + // the same column→aesthetic renaming pipeline as regular layers + column_names.push(aesthetic.clone()); + + // Create final mapping directly (no intermediate Literal step) + let is_positional = crate::plot::aesthetic::is_positional_aesthetic(aesthetic); + let mapping_value = if is_positional { + // Positional aesthetics use Column (participate in scales) + AestheticValue::Column { + name: aesthetic.clone(), // Raw aesthetic name from VALUES clause + original_name: None, + is_dummy: false, + } + } else { + // Non-positional aesthetics use AnnotationColumn (identity scale) + AestheticValue::AnnotationColumn { + name: aesthetic.clone(), // Raw aesthetic name from VALUES clause + } + }; + + layer.mappings.insert(aesthetic.clone(), mapping_value); + // Remove from parameters now that it's in mappings + layer.parameters.remove(aesthetic); + } + + // Step 4: Build VALUES rows + let mut rows = Vec::new(); + for i in 0..max_length { + let values: Vec = columns.iter().map(|col| col[i].to_sql()).collect(); + rows.push(format!("({})", values.join(", "))); + } + + // Step 5: Build complete SQL query + let values_clause = rows.join(", "); + let column_list = column_names + .iter() + .map(|c| format!("\"{}\"", c)) + .collect::>() + .join(", "); + + let sql = format!( + "SELECT * FROM (VALUES {}) AS t({})", + values_clause, column_list + ); + + Ok(sql) +} + /// Normalize mapping column names to match their aesthetic keys after flip-back. /// /// After flipping positional aesthetics, the mapping values (column names) may not match the keys. @@ -673,3 +880,230 @@ pub fn resolve_orientations( } } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::plot::{ArrayElement, DataSource, Geom, Layer, ParameterValue}; + + #[test] + fn test_annotation_single_scalar() { + let mut layer = Layer::new(Geom::text()); + layer.source = Some(DataSource::Annotation); + // Put values in parameters (not mappings) - process_annotation_layer will process them + layer + .parameters + .insert("pos1".to_string(), ParameterValue::Number(5.0)); + layer + .parameters + .insert("pos2".to_string(), ParameterValue::Number(10.0)); + layer.parameters.insert( + "label".to_string(), + ParameterValue::String("Test".to_string()), + ); + + let result = process_annotation_layer(&mut layer).unwrap(); + + // Should produce: SELECT * FROM (VALUES (...)) AS t("pos1", "pos2", "label") + // Uses raw aesthetic names, not prefixed names + assert!(result.contains("VALUES")); + // Check all values are present (order may vary due to HashMap) + assert!(result.contains("5")); + assert!(result.contains("10")); + assert!(result.contains("'Test'")); + // Raw aesthetic names in column list + assert!(result.contains("\"pos1\"")); + assert!(result.contains("\"pos2\"")); + assert!(result.contains("\"label\"")); + + // After processing, mappings should have Column/AnnotationColumn values + assert!(layer.mappings.contains_key("pos1")); + assert!(layer.mappings.contains_key("pos2")); + assert!(layer.mappings.contains_key("label")); + } + + #[test] + fn test_annotation_array_recycling() { + let mut layer = Layer::new(Geom::text()); + layer.source = Some(DataSource::Annotation); + layer.parameters.insert( + "pos1".to_string(), + ParameterValue::Array(vec![ + ArrayElement::Number(1.0), + ArrayElement::Number(2.0), + ArrayElement::Number(3.0), + ]), + ); + layer + .parameters + .insert("pos2".to_string(), ParameterValue::Number(10.0)); + layer.parameters.insert( + "label".to_string(), + ParameterValue::String("Same".to_string()), + ); + + let result = process_annotation_layer(&mut layer).unwrap(); + + // Should recycle scalar pos2 and label to match array length (3) + assert!(result.contains("VALUES")); + // Check that all values appear (order may vary due to HashMap) + assert!(result.contains("1") && result.contains("2") && result.contains("3")); + assert!(result.contains("10")); + assert!(result.contains("'Same'")); + // Check row count by counting parentheses pairs in VALUES + assert_eq!(result.matches("), (").count() + 1, 3, "Should have 3 rows"); + } + + #[test] + fn test_annotation_mismatched_arrays() { + let mut layer = Layer::new(Geom::text()); + layer.source = Some(DataSource::Annotation); + layer.parameters.insert( + "pos1".to_string(), + ParameterValue::Array(vec![ + ArrayElement::Number(1.0), + ArrayElement::Number(2.0), + ArrayElement::Number(3.0), + ]), + ); + layer.parameters.insert( + "pos2".to_string(), + ParameterValue::Array(vec![ArrayElement::Number(10.0), ArrayElement::Number(20.0)]), + ); + + let result = process_annotation_layer(&mut layer); + + // Should error with mismatched lengths + assert!(result.is_err()); + let err_msg = result.unwrap_err().to_string(); + assert!( + err_msg.contains("mismatched array lengths"), + "Error message should mention mismatched arrays" + ); + // Error should mention one of the aesthetics (order may vary) + assert!( + err_msg.contains("pos1") || err_msg.contains("pos2"), + "Error message should mention at least one aesthetic" + ); + } + + #[test] + fn test_annotation_multiple_arrays_same_length() { + let mut layer = Layer::new(Geom::text()); + layer.source = Some(DataSource::Annotation); + layer.parameters.insert( + "pos1".to_string(), + ParameterValue::Array(vec![ArrayElement::Number(1.0), ArrayElement::Number(2.0)]), + ); + layer.parameters.insert( + "pos2".to_string(), + ParameterValue::Array(vec![ArrayElement::Number(10.0), ArrayElement::Number(20.0)]), + ); + + let result = process_annotation_layer(&mut layer).unwrap(); + + // Both arrays have length 2, should work (order may vary) + assert!(result.contains("VALUES")); + assert!(result.contains("1") && result.contains("2")); + assert!(result.contains("10") && result.contains("20")); + assert_eq!(result.matches("), (").count() + 1, 2, "Should have 2 rows"); + } + + #[test] + fn test_annotation_mixed_types() { + let mut layer = Layer::new(Geom::text()); + layer.source = Some(DataSource::Annotation); + layer + .parameters + .insert("pos1".to_string(), ParameterValue::Number(5.0)); + layer + .parameters + .insert("pos2".to_string(), ParameterValue::Number(10.0)); + layer.parameters.insert( + "label".to_string(), + ParameterValue::String("Text".to_string()), + ); + + let result = process_annotation_layer(&mut layer).unwrap(); + + // Should handle different types (order may vary) + assert!(result.contains("VALUES")); + assert!(result.contains("5")); + assert!(result.contains("10")); + assert!(result.contains("'Text'")); + } + + #[test] + fn test_literal_to_series_date_parsing() { + use polars::prelude::DataType; + + // Date literal should parse to Date type + let series = literal_to_series( + "date_col", + &ParameterValue::String("1973-06-01".to_string()), + 5, + ); + assert_eq!( + series.dtype(), + &DataType::Date, + "Date string should parse to Date type" + ); + assert_eq!(series.len(), 5); + } + + #[test] + fn test_literal_to_series_datetime_parsing() { + use polars::prelude::{DataType, TimeUnit}; + + // DateTime literal should parse to Datetime type + let series = literal_to_series( + "dt_col", + &ParameterValue::String("2024-03-17T14:30:00".to_string()), + 3, + ); + assert!( + matches!( + series.dtype(), + DataType::Datetime(TimeUnit::Microseconds, None) + ), + "DateTime string should parse to Datetime type" + ); + assert_eq!(series.len(), 3); + } + + #[test] + fn test_literal_to_series_time_parsing() { + use polars::prelude::DataType; + + // Time literal should parse to Time type + let series = literal_to_series( + "time_col", + &ParameterValue::String("14:30:00".to_string()), + 4, + ); + assert_eq!( + series.dtype(), + &DataType::Time, + "Time string should parse to Time type" + ); + assert_eq!(series.len(), 4); + } + + #[test] + fn test_literal_to_series_string_fallback() { + use polars::prelude::DataType; + + // Non-temporal string should remain String type + let series = literal_to_series( + "text_col", + &ParameterValue::String("not a date".to_string()), + 2, + ); + assert_eq!( + series.dtype(), + &DataType::String, + "Non-temporal string should remain String type" + ); + assert_eq!(series.len(), 2); + } +} diff --git a/src/execute/mod.rs b/src/execute/mod.rs index 5ed2e65b..c8c7475f 100644 --- a/src/execute/mod.rs +++ b/src/execute/mod.rs @@ -28,7 +28,7 @@ use crate::plot::aesthetic::{is_positional_aesthetic, AestheticContext}; use crate::plot::facet::{resolve_properties as resolve_facet_properties, FacetDataContext}; use crate::plot::layer::is_transposed; use crate::plot::{AestheticValue, Layer, Scale, ScaleTypeKind, Schema}; -use crate::{DataFrame, GgsqlError, Plot, Result}; +use crate::{DataFrame, DataSource, GgsqlError, Plot, Result}; use std::collections::{HashMap, HashSet}; use crate::reader::Reader; @@ -163,6 +163,11 @@ fn is_null_sentinel(value: &AestheticValue) -> bool { fn merge_global_mappings_into_layers(specs: &mut [Plot], layer_schemas: &[Schema]) { for spec in specs { for (layer, schema) in spec.layers.iter_mut().zip(layer_schemas.iter()) { + // Skip annotation layers - they don't inherit global mappings + if matches!(layer.source, Some(DataSource::Annotation)) { + continue; + } + let supported = layer.geom.aesthetics().supported(); let schema_columns: HashSet<&str> = schema.iter().map(|c| c.name.as_str()).collect(); @@ -962,11 +967,12 @@ pub fn prepare_data_with_reader(query: &str, reader: &R) -> Result = specs[0] .layers - .iter() + .iter_mut() .map(|l| layer::layer_source_query(l, &materialized_ctes, has_global_table)) - .collect(); + .collect::>>()?; // Get types for each layer from source queries (Phase 1: types only, no min/max yet) let mut layer_type_info: Vec> = Vec::new(); @@ -2317,6 +2323,300 @@ mod tests { ); } + #[cfg(feature = "duckdb")] + #[test] + fn test_place_annotation_layer() { + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + // Create test data + reader + .connection() + .execute( + "CREATE TABLE test_place AS SELECT * FROM (VALUES (1, 10), (2, 20), (3, 30)) AS t(x, y)", + duckdb::params![], + ) + .unwrap(); + + let query = r#" + SELECT * FROM test_place + VISUALISE x, y + DRAW point + PLACE text SETTING x => 2, y => 25, label => 'Annotation', fontsize => 14 + "#; + + let result = prepare_data_with_reader(query, &reader).unwrap(); + + assert_eq!(result.specs.len(), 1); + assert_eq!( + result.specs[0].layers.len(), + 2, + "Should have DRAW + PLACE layers" + ); + + // First layer: regular DRAW point + let point_layer = &result.specs[0].layers[0]; + assert_eq!(point_layer.geom, crate::Geom::point()); + assert!( + point_layer.source.is_none(), + "DRAW layer should have no explicit source" + ); + + // Second layer: PLACE text annotation + let annotation_layer = &result.specs[0].layers[1]; + assert_eq!(annotation_layer.geom, crate::Geom::text()); + assert!( + matches!(annotation_layer.source, Some(DataSource::Annotation)), + "PLACE layer should have Annotation source" + ); + + // Verify annotation layer has 1-row data + let annotation_key = annotation_layer.data_key.as_ref().unwrap(); + let annotation_df = result.data.get(annotation_key).unwrap(); + assert_eq!( + annotation_df.height(), + 1, + "Annotation layer should have exactly 1 row" + ); + + // Verify positional aesthetics are moved from SETTING to mappings with transformed names + // They become Column references (not Literals) so they can participate in scale training + assert!( + matches!( + annotation_layer.mappings.get("pos1"), + Some(AestheticValue::Column { name, .. }) if name == "__ggsql_aes_pos1__" + ), + "x should be transformed to pos1, moved to mappings, and materialized as column" + ); + assert!( + matches!( + annotation_layer.mappings.get("pos2"), + Some(AestheticValue::Column { name, .. }) if name == "__ggsql_aes_pos2__" + ), + "y should be transformed to pos2, moved to mappings, and materialized as column" + ); + + // Verify required non-positional aesthetic (label) is in mappings as AnnotationColumn + // After process_annotation_layer, required aesthetics are converted to AnnotationColumn + assert!( + matches!( + annotation_layer.mappings.get("label"), + Some(AestheticValue::AnnotationColumn { name }) if name == "__ggsql_aes_label__" + ), + "label (required) should be in mappings as AnnotationColumn with prefixed name" + ); + + // Non-required, non-positional, non-array aesthetics like size may be processed + // by resolve_aesthetics or other downstream logic, so we don't strictly check + // where they end up. The key point is that required/positional aesthetics are + // correctly moved to mappings. + } + + #[cfg(feature = "duckdb")] + #[test] + fn test_place_annotation_with_stat_geom() { + // Test that annotation layers work with stat geoms (e.g., histogram) + // This was previously broken due to naming conflicts: + // - Annotation layers created __ggsql_aes_pos1__ directly + // - Stat transforms tried to rename __ggsql_stat_bin → __ggsql_aes_pos1__ (conflict!) + // Now fixed: annotations use raw names (pos1), go through normal renaming pipeline + + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + let query = r#" + VISUALISE + PLACE histogram SETTING x => [1.2, 2.5, 3.1, 2.8, 1.9, 2.2, 3.5, 2.1, 1.8, 2.9], bins => 5 + "#; + + let result = prepare_data_with_reader(query, &reader).unwrap(); + + assert_eq!(result.specs.len(), 1); + assert_eq!( + result.specs[0].layers.len(), + 1, + "Should have one PLACE layer" + ); + + let histogram_layer = &result.specs[0].layers[0]; + assert_eq!(histogram_layer.geom, crate::Geom::histogram()); + assert!( + matches!(histogram_layer.source, Some(DataSource::Annotation)), + "PLACE layer should have Annotation source" + ); + + // After stat transform, pos1 should be remapped to bin + assert!( + histogram_layer.mappings.contains_key("pos1"), + "Histogram should have pos1 aesthetic (bin start)" + ); + assert!( + histogram_layer.mappings.contains_key("pos1end"), + "Histogram should have pos1end aesthetic (bin end)" + ); + assert!( + histogram_layer.mappings.contains_key("pos2"), + "Histogram should have pos2 aesthetic (count)" + ); + + // Verify the data has binned results + let histogram_key = histogram_layer.data_key.as_ref().unwrap(); + let histogram_df = result.data.get(histogram_key).unwrap(); + + assert!( + histogram_df.height() > 0, + "Histogram should produce binned data" + ); + assert!( + histogram_df.height() <= 5, + "Histogram with 5 bins should produce at most 5 rows" + ); + + // Verify the binned data has the expected columns + assert!( + histogram_df.column("__ggsql_aes_pos1__").is_ok(), + "Should have bin start column" + ); + assert!( + histogram_df.column("__ggsql_aes_pos1end__").is_ok(), + "Should have bin end column" + ); + assert!( + histogram_df.column("__ggsql_aes_pos2__").is_ok(), + "Should have count column" + ); + } + + #[cfg(feature = "duckdb")] + #[test] + fn test_place_missing_required_aesthetic() { + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + let query = r#" + SELECT 1 AS x, 2 AS y + VISUALISE x, y + DRAW point + PLACE text SETTING x => 5, label => 'Missing y!' + "#; + + let result = prepare_data_with_reader(query, &reader); + assert!(result.is_err(), "Should fail validation"); + + match result { + Err(GgsqlError::ValidationError(msg)) => { + assert!( + msg.contains("pos2"), + "Error should mention missing pos2 aesthetic: {}", + msg + ); + } + Err(e) => panic!("Expected ValidationError, got: {}", e), + Ok(_) => panic!("Expected error, got success"), + } + } + + #[cfg(feature = "duckdb")] + #[test] + fn test_place_affects_scale_ranges() { + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + // Data has x from 1-3, but PLACE annotation at x=10 should extend the range + let query = r#" + SELECT 1 AS x, 10 AS y UNION ALL + SELECT 2 AS x, 20 AS y UNION ALL + SELECT 3 AS x, 30 AS y + VISUALISE x, y + DRAW point + PLACE text SETTING x => 10, y => 50, label => 'Extended' + "#; + + let result = prepare_data_with_reader(query, &reader); + assert!(result.is_ok(), "Query should execute: {:?}", result.err()); + + let prep = result.unwrap(); + + // Check that x scale input_range includes both data range (1-3) and annotation point (10) + let x_scale = prep.specs[0].find_scale("pos1"); + assert!(x_scale.is_some(), "Should have x scale"); + + let scale = x_scale.unwrap(); + if let Some(input_range) = &scale.input_range { + // Input range should include the annotation x value (10) + // The scale input_range is resolved from the combined data + annotation DataFrames + assert!( + input_range.len() >= 2, + "Scale input_range should have min/max values" + ); + // Check that the range max is at least 10 (the annotation x value) + if let Some(max_val) = input_range.last() { + match max_val { + crate::plot::types::ArrayElement::Number(n) => { + assert!( + *n >= 10.0, + "Scale input_range max should include annotation point at x=10, got: {}", + n + ); + } + _ => panic!("Expected numeric input_range value"), + } + } + } else { + panic!("Scale should have an input_range"); + } + } + + #[cfg(feature = "duckdb")] + #[test] + fn test_place_no_global_mapping_inheritance() { + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + let query = r#" + SELECT 1 AS x, 2 AS y, 'red' AS color + VISUALISE x, y, color + DRAW point + PLACE text SETTING x => 5, y => 10, label => 'Test' + "#; + + let result = prepare_data_with_reader(query, &reader).unwrap(); + + // DRAW layer should have inherited global color mapping + let point_layer = &result.specs[0].layers[0]; + assert!( + point_layer.mappings.contains_key("color") + || point_layer.mappings.contains_key("fill") + || point_layer.mappings.contains_key("stroke"), + "DRAW layer should inherit color from global mappings" + ); + + // PLACE layer should NOT have inherited global mappings + let annotation_layer = &result.specs[0].layers[1]; + assert!( + !annotation_layer.mappings.contains_key("color"), + "PLACE layer should not inherit color from global mappings" + ); + + // PLACE layer should have geom default fill='black', not global color='red' + assert!( + annotation_layer.mappings.contains_key("fill"), + "PLACE layer should have default fill from text geom" + ); + match annotation_layer.mappings.aesthetics.get("fill") { + Some(AestheticValue::Literal(crate::plot::types::ParameterValue::String(s))) + if s == "black" => + { + // Correct: geom default fill + } + Some(AestheticValue::Column { name, .. }) if name == "color" => { + panic!("PLACE layer incorrectly inherited global color mapping as fill"); + } + other => { + panic!("Expected fill=Literal('black'), got: {:?}", other); + } + } + + assert!( + !annotation_layer.mappings.contains_key("stroke"), + "PLACE layer should not have stroke (text geom default is null)" + ); + } #[cfg(feature = "duckdb")] #[test] fn test_null_mapping_removes_global_aesthetic() { diff --git a/src/execute/scale.rs b/src/execute/scale.rs index 6f335c37..2ef33abe 100644 --- a/src/execute/scale.rs +++ b/src/execute/scale.rs @@ -558,7 +558,8 @@ pub fn find_schema_columns_for_aesthetic( for aes_name in &aesthetics_to_check { if let Some(value) = layer.mappings.get(aes_name) { match value { - AestheticValue::Column { name, .. } => { + AestheticValue::Column { name, .. } + | AestheticValue::AnnotationColumn { name } => { if let Some(info) = schema.iter().find(|c| c.name == *name) { infos.push(info.clone()); } @@ -1066,10 +1067,12 @@ pub fn find_columns_for_aesthetic<'a>( if let Some(df) = data_map.get(&naming::layer_key(i)) { for aes_name in &aesthetics_to_check { if let Some(AestheticValue::Column { name, .. }) = layer.mappings.get(aes_name) { + // Regular columns (data and positional annotations) participate in scale training if let Ok(column) = df.column(name) { column_refs.push(column); } } + // AnnotationColumn and Literal don't participate in scale training } } } diff --git a/src/execute/schema.rs b/src/execute/schema.rs index 568c7e81..ad80aa7f 100644 --- a/src/execute/schema.rs +++ b/src/execute/schema.rs @@ -6,9 +6,9 @@ //! 2. Apply casting to queries //! 3. complete_schema_ranges() - get min/max from cast queries -use crate::plot::{AestheticValue, ColumnInfo, Layer, ParameterValue, Schema}; +use crate::plot::{AestheticValue, ArrayElement, ColumnInfo, Layer, ParameterValue, Schema}; use crate::{naming, DataFrame, Result}; -use polars::prelude::DataType; +use polars::prelude::{DataType, TimeUnit}; /// Simple type info tuple: (name, dtype, is_discrete) pub type TypeInfo = (String, DataType, bool); @@ -246,16 +246,37 @@ pub fn add_literal_columns_to_type_info(layers: &[Layer], layer_type_info: &mut for (layer, type_info) in layers.iter().zip(layer_type_info.iter_mut()) { for (aesthetic, value) in &layer.mappings.aesthetics { if let AestheticValue::Literal(lit) = value { - let dtype = match lit { - ParameterValue::String(_) => DataType::String, - ParameterValue::Number(_) => DataType::Float64, - ParameterValue::Boolean(_) => DataType::Boolean, - ParameterValue::Array(_) | ParameterValue::Null => unreachable!( - "Grammar prevents arrays and null in literal aesthetic mappings" - ), + let (dtype, is_discrete) = match lit { + ParameterValue::String(_) => (DataType::String, true), + ParameterValue::Number(_) => (DataType::Float64, false), + ParameterValue::Boolean(_) => (DataType::Boolean, true), + ParameterValue::Array(arr) => { + // Infer dtype from first element (arrays are homogeneous) + if let Some(first) = arr.first() { + match first { + ArrayElement::String(_) => (DataType::String, true), + ArrayElement::Number(_) => (DataType::Float64, false), + ArrayElement::Boolean(_) => (DataType::Boolean, true), + ArrayElement::Date(_) => (DataType::Date, false), + ArrayElement::DateTime(_) => { + (DataType::Datetime(TimeUnit::Microseconds, None), false) + } + ArrayElement::Time(_) => (DataType::Time, false), + ArrayElement::Null => { + // Null element: default to Float64 + (DataType::Float64, false) + } + } + } else { + // Empty array: default to Float64 + (DataType::Float64, false) + } + } + ParameterValue::Null => { + // Skip null literals - they don't create columns + continue; + } }; - let is_discrete = - matches!(lit, ParameterValue::String(_) | ParameterValue::Boolean(_)); let col_name = naming::aesthetic_column(aesthetic); // Only add if not already present @@ -280,7 +301,7 @@ pub fn build_aesthetic_schema(layer: &Layer, schema: &Schema) -> Schema { for (aesthetic, value) in &layer.mappings.aesthetics { let aes_col_name = naming::aesthetic_column(aesthetic); match value { - AestheticValue::Column { name, .. } => { + AestheticValue::Column { name, .. } | AestheticValue::AnnotationColumn { name } => { // The schema already has aesthetic-prefixed column names from build_layer_base_query, // so we look up by aesthetic name, not the original column name. // Fall back to original name for backwards compatibility with older schemas. @@ -310,21 +331,41 @@ pub fn build_aesthetic_schema(layer: &Layer, schema: &Schema) -> Schema { } AestheticValue::Literal(lit) => { // Literals become columns with appropriate type - let dtype = match lit { - ParameterValue::String(_) => DataType::String, - ParameterValue::Number(_) => DataType::Float64, - ParameterValue::Boolean(_) => DataType::Boolean, - ParameterValue::Array(_) | ParameterValue::Null => unreachable!( - "Grammar prevents arrays and null in literal aesthetic mappings" - ), + let (dtype, is_discrete) = match lit { + ParameterValue::String(_) => (DataType::String, true), + ParameterValue::Number(_) => (DataType::Float64, false), + ParameterValue::Boolean(_) => (DataType::Boolean, true), + ParameterValue::Array(arr) => { + // Infer dtype from first element (arrays are homogeneous) + if let Some(first) = arr.first() { + match first { + ArrayElement::String(_) => (DataType::String, true), + ArrayElement::Number(_) => (DataType::Float64, false), + ArrayElement::Boolean(_) => (DataType::Boolean, true), + ArrayElement::Date(_) => (DataType::Date, false), + ArrayElement::DateTime(_) => { + (DataType::Datetime(TimeUnit::Microseconds, None), false) + } + ArrayElement::Time(_) => (DataType::Time, false), + ArrayElement::Null => { + // Null element: default to Float64 + (DataType::Float64, false) + } + } + } else { + // Empty array: default to Float64 + (DataType::Float64, false) + } + } + ParameterValue::Null => { + // Null: default to Float64 + (DataType::Float64, false) + } }; aesthetic_schema.push(ColumnInfo { name: aes_col_name, dtype, - is_discrete: matches!( - lit, - ParameterValue::String(_) | ParameterValue::Boolean(_) - ), + is_discrete, min: None, max: None, }); diff --git a/src/lib.rs b/src/lib.rs index 7c6a649f..62761387 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -758,6 +758,73 @@ mod integration_tests { ); } + #[test] + fn test_end_to_end_place_field_vs_value_encoding() { + // Test that PLACE annotation layers render correctly: + // - Positional aesthetics (x, y) as field encodings (reference columns) + // - Non-positional aesthetics (size, stroke) as value encodings (datum values) + + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + let query = r#" + SELECT 1 AS x, 10 AS y UNION ALL SELECT 2 AS x, 20 AS y + VISUALISE x, y + DRAW line + PLACE point SETTING x => 5, y => 30, size => 100, stroke => 'red' + "#; + + let prepared = execute::prepare_data_with_reader(query, &reader).unwrap(); + + // Render to Vega-Lite + let writer = VegaLiteWriter::new(); + let json_str = writer.write(&prepared.specs[0], &prepared.data).unwrap(); + let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap(); + + // Find the point annotation layer (should be second layer) + let point_layer = &vl_spec["layer"][1]; + + // Mark can be either a string or an object with type + let mark_type = if point_layer["mark"].is_string() { + point_layer["mark"].as_str().unwrap() + } else { + point_layer["mark"]["type"].as_str().unwrap() + }; + assert_eq!( + mark_type, "point", + "Second layer should be point annotation" + ); + + let encoding = &point_layer["encoding"]; + + // Positional aesthetics should be field encodings (have "field" key) + assert!( + encoding["x"]["field"].is_string(), + "x should be a field encoding: {:?}", + encoding["x"] + ); + assert!( + encoding["y"]["field"].is_string(), + "y should be a field encoding: {:?}", + encoding["y"] + ); + + // Non-positional aesthetics should be value encodings (have "value" key) + assert!( + encoding["size"]["value"].is_number(), + "size should be a value encoding with numeric value: {:?}", + encoding["size"] + ); + + // Note: stroke color goes through resolve_aesthetics and may be in different location + // Just verify it's present somewhere as a literal + let has_stroke_value = encoding + .get("stroke") + .and_then(|s| s.get("value")) + .is_some() + || point_layer["mark"].get("stroke").is_some(); + assert!(has_stroke_value, "stroke should be present as a value"); + } + #[test] fn test_end_to_end_global_constant_in_visualise() { // Test that global constants in VISUALISE clause work correctly diff --git a/src/parser/builder.rs b/src/parser/builder.rs index 74e5edd0..19121ca4 100644 --- a/src/parser/builder.rs +++ b/src/parser/builder.rs @@ -294,6 +294,10 @@ fn build_visualise_statement(node: &Node, source: &SourceTree) -> Result { // since geom definitions use internal names for their supported/required aesthetics spec.transform_aesthetics_to_internal(); + // Note: Annotation layer processing (moving parameters to mappings) now happens + // during execution in process_annotation_layer(), not during parsing. + // This keeps all annotation-specific logic in one place. + Ok(spec) } @@ -306,6 +310,10 @@ fn process_viz_clause(node: &Node, source: &SourceTree, spec: &mut Plot) -> Resu let layer = build_layer(&child, source)?; spec.layers.push(layer); } + "place_clause" => { + let layer = build_place_layer(&child, source)?; + spec.layers.push(layer); + } "scale_clause" => { let scale = build_scale(&child, source)?; spec.scales.push(scale); @@ -507,6 +515,22 @@ fn build_layer(node: &Node, source: &SourceTree) -> Result { Ok(layer) } +/// Build an annotation Layer from a place_clause node +/// This is similar to build_layer but marks it as an annotation layer. +/// The transformation of positional/required aesthetics from SETTING to mappings +/// happens later in Plot::transform_aesthetics_to_internal(). +/// Syntax: PLACE geom [MAPPING col AS x, ...] [SETTING param => val, ...] [FILTER condition] +fn build_place_layer(node: &Node, source: &SourceTree) -> Result { + // Build the layer using standard logic + let mut layer = build_layer(node, source)?; + + // Mark as annotation layer + // Array recycling happens later during SQL generation in process_annotation_layer() + layer.source = Some(DataSource::Annotation); + + Ok(layer) +} + /// Parse a setting_clause: SETTING param => value, ... fn parse_setting_clause( node: &Node, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index a3d8676c..dd2c28c8 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -413,4 +413,60 @@ mod tests { Some(DataSource::Identifier("cte".to_string())) ); } + + #[test] + fn test_place_clause() { + let query = r#" + SELECT x, y FROM data + VISUALISE x, y + DRAW point + PLACE text SETTING x => 5, y => 10, label => 'Hello' + "#; + + let result = parse_query(query); + assert!(result.is_ok(), "Failed to parse PLACE clause: {:?}", result); + + let specs = result.unwrap(); + assert_eq!(specs.len(), 1); + assert_eq!(specs[0].layers.len(), 2, "Expected 2 layers (DRAW + PLACE)"); + + // First layer: regular DRAW point + assert_eq!(specs[0].layers[0].geom, Geom::point()); + assert!( + specs[0].layers[0].source.is_none(), + "DRAW layer should have no explicit source" + ); + + // Second layer: PLACE text with annotation source + assert_eq!(specs[0].layers[1].geom, Geom::text()); + assert!( + matches!(specs[0].layers[1].source, Some(DataSource::Annotation)), + "PLACE layer should have Annotation source" + ); + + // After parsing, annotation layer parameters stay in parameters + // They are only moved to mappings during execution (in process_annotation_layer) + // (transform_aesthetics_to_internal runs and transforms x→pos1, y→pos2) + assert_eq!( + specs[0].layers[1].parameters.get("pos1"), + Some(&ParameterValue::Number(5.0)), + "x should be transformed to pos1 but remain in parameters at parse time" + ); + assert_eq!( + specs[0].layers[1].parameters.get("pos2"), + Some(&ParameterValue::Number(10.0)), + "y should be transformed to pos2 but remain in parameters at parse time" + ); + assert_eq!( + specs[0].layers[1].parameters.get("label"), + Some(&ParameterValue::String("Hello".to_string())), + "label (required) also remains in parameters at parse time" + ); + + // Mappings should be empty at parse time for annotation layers + assert!( + specs[0].layers[1].mappings.is_empty(), + "Annotation layer mappings should be empty at parse time (populated during execution)" + ); + } } diff --git a/src/plot/layer/mod.rs b/src/plot/layer/mod.rs index 54759ace..d3fe70d5 100644 --- a/src/plot/layer/mod.rs +++ b/src/plot/layer/mod.rs @@ -351,8 +351,12 @@ impl Layer { // Column is now named with the prefixed aesthetic name *name = aes_col_name; } + AestheticValue::AnnotationColumn { name } => { + // AnnotationColumn already has identity scale behavior, just update name + *name = aes_col_name; + } AestheticValue::Literal(_) => { - // Literals are also columns with prefixed aesthetic name + // Literals become standard columns with prefixed aesthetic name // Note: literals don't have an original_name to preserve *value = AestheticValue::standard_column(aes_col_name); } @@ -391,6 +395,13 @@ impl Layer { is_dummy: *is_dummy, } } + AestheticValue::AnnotationColumn { .. } => { + // Annotation columns can be remapped (e.g., stat transforms on annotation data) + // They remain annotation columns (identity scale) + AestheticValue::AnnotationColumn { + name: prefixed_name, + } + } AestheticValue::Literal(_) => { // Literal becomes a column reference after post-query processing // No original_name since it's a constant value diff --git a/src/plot/main.rs b/src/plot/main.rs index 8700562f..031fe957 100644 --- a/src/plot/main.rs +++ b/src/plot/main.rs @@ -47,6 +47,10 @@ pub use super::projection::{Coord, Projection}; // Re-export Facet types from the facet module pub use super::facet::{Facet, FacetLayout}; +// ============================================================================= +// Plot Type +// ============================================================================= + /// Complete ggsql visualization specification #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Plot { @@ -168,6 +172,7 @@ impl Plot { /// - Global mappings /// - Layer aesthetics /// - Layer remappings + /// - Layer parameters (SETTING aesthetics) /// - Scale aesthetics /// - Label keys pub fn transform_aesthetics_to_internal(&mut self) { @@ -176,10 +181,21 @@ impl Plot { // Transform global mappings self.global_mappings.transform_to_internal(&ctx); - // Transform layer aesthetics and remappings + // Transform layer aesthetics, remappings, and parameters for layer in &mut self.layers { layer.mappings.transform_to_internal(&ctx); layer.remappings.transform_to_internal(&ctx); + + // Transform parameter keys from user-facing to internal names + // This ensures position aesthetics in SETTING (e.g., x => 5) become internal (pos1 => 5) + let original_parameters = std::mem::take(&mut layer.parameters); + for (param_name, value) in original_parameters { + let internal_name = ctx + .map_user_to_internal(¶m_name) + .map(|s| s.to_string()) + .unwrap_or(param_name); + layer.parameters.insert(internal_name, value); + } } // Transform scale aesthetics @@ -503,7 +519,7 @@ mod tests { assert!(bar.is_supported("pos1")); // Bar accepts optional pos1 assert_eq!(bar.required(), &[] as &[&str]); // No required aesthetics - // Text geom + // Text geom - requires label let text = Geom::text().aesthetics(); assert!(text.is_supported("label")); assert!(text.is_supported("typeface")); diff --git a/src/plot/types.rs b/src/plot/types.rs index 19a21778..ec28b525 100644 --- a/src/plot/types.rs +++ b/src/plot/types.rs @@ -140,6 +140,9 @@ pub enum DataSource { Identifier(String), /// File path (quoted string like 'data.csv') FilePath(String), + /// Annotation layer (PLACE clause) + /// Row count and array recycling handled during SQL generation + Annotation, } impl DataSource { @@ -148,6 +151,7 @@ impl DataSource { match self { DataSource::Identifier(s) => s, DataSource::FilePath(s) => s, + DataSource::Annotation => "__annotation__", } } @@ -155,6 +159,11 @@ impl DataSource { pub fn is_file(&self) -> bool { matches!(self, DataSource::FilePath(_)) } + + /// Returns true if this is an annotation layer source + pub fn is_annotation(&self) -> bool { + matches!(self, DataSource::Annotation) + } } // ============================================================================= @@ -164,7 +173,7 @@ impl DataSource { /// Value for aesthetic mappings #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub enum AestheticValue { - /// Column reference + /// Column reference from data source Column { name: String, /// Original column name before internal renaming (for labels) @@ -174,12 +183,17 @@ pub enum AestheticValue { /// Whether this is a dummy/placeholder column (e.g., for bar charts without x mapped) is_dummy: bool, }, + /// Annotation column for non-positional aesthetics (synthesized from PLACE literals) + /// These columns are generated from user-specified literal values in visual space + /// (e.g., color => 'red', size => 10) and use identity scales (no transformation). + /// Positional annotations (x, y) use Column instead since they're in data coordinate space. + AnnotationColumn { name: String }, /// Literal value (quoted string, number, or boolean) Literal(ParameterValue), } impl AestheticValue { - /// Create a column mapping + /// Create a standard column mapping pub fn standard_column(name: impl Into) -> Self { Self::Column { name: name.into(), @@ -209,10 +223,15 @@ impl AestheticValue { } } + /// Create an annotation column mapping (synthesized from PLACE literals) + pub fn annotation_column(name: impl Into) -> Self { + Self::AnnotationColumn { name: name.into() } + } + /// Get column name if this is a column mapping pub fn column_name(&self) -> Option<&str> { match self { - Self::Column { name, .. } => Some(name), + Self::Column { name, .. } | Self::AnnotationColumn { name } => Some(name), _ => None, } } @@ -229,16 +248,19 @@ impl AestheticValue { original_name, .. } => Some(original_name.as_deref().unwrap_or(name)), + Self::AnnotationColumn { name } => Some(name), _ => None, } } /// Check if this is a dummy/placeholder column pub fn is_dummy(&self) -> bool { - match self { - Self::Column { is_dummy, .. } => *is_dummy, - _ => false, - } + matches!(self, Self::Column { is_dummy: true, .. }) + } + + /// Check if this is an annotation column + pub fn is_annotation(&self) -> bool { + matches!(self, Self::AnnotationColumn { .. }) } /// Check if this is a literal value (not a column mapping) @@ -250,7 +272,9 @@ impl AestheticValue { impl std::fmt::Display for AestheticValue { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - AestheticValue::Column { name, .. } => write!(f, "{}", name), + AestheticValue::Column { name, .. } | AestheticValue::AnnotationColumn { name } => { + write!(f, "{}", name) + } AestheticValue::Literal(lit) => write!(f, "{}", lit), } } @@ -609,6 +633,79 @@ impl ArrayElement { } } + /// Homogenize a slice of array elements to a common type. + /// + /// Infers the target type from all elements, then attempts to coerce all elements + /// to that type. If coercion fails (e.g., string + number), falls back to String type. + /// + /// Returns a new vector with homogenized elements. + pub fn homogenize(values: &[Self]) -> Vec { + // Infer target type from all elements + let Some(target_type) = Self::infer_type(values) else { + // All nulls or empty array - return cloned as-is + return values.to_vec(); + }; + + // Try to coerce all elements to the inferred type + let coerced: Result, _> = values + .iter() + .map(|elem| elem.coerce_to(target_type)) + .collect(); + + match coerced { + Ok(coerced_arr) => coerced_arr, + Err(_) => { + // Coercion failed - fall back to String type + values + .iter() + .map(|elem| { + elem.coerce_to(ArrayElementType::String) + .unwrap_or(Self::Null) + }) + .collect() + } + } + } + + /// Convert this element to a SQL literal string. + /// + /// Used for generating SQL expressions from literal values. + pub fn to_sql(&self) -> String { + match self { + Self::String(s) => format!("'{}'", s.replace('\'', "''")), + Self::Number(n) => n.to_string(), + Self::Boolean(b) => { + if *b { + "TRUE".to_string() + } else { + "FALSE".to_string() + } + } + Self::Date(d) => { + // Convert days since epoch to DATE + // Use CAST to ensure result is DATE type, not TIMESTAMP + format!("CAST(DATE '1970-01-01' + INTERVAL {} DAY AS DATE)", d) + } + Self::DateTime(dt) => { + // Convert microseconds since epoch to TIMESTAMP + format!( + "TIMESTAMP '1970-01-01 00:00:00' + INTERVAL {} MICROSECOND", + dt + ) + } + Self::Time(t) => { + // Convert nanoseconds since midnight to TIME + let seconds = t / 1_000_000_000; + let nanos = t % 1_000_000_000; + format!( + "TIME '00:00:00' + INTERVAL {} SECOND + INTERVAL {} NANOSECOND", + seconds, nanos + ) + } + Self::Null => "NULL".to_string(), + } + } + /// Get the type name for error messages. fn type_name(&self) -> &'static str { match self { @@ -695,6 +792,39 @@ impl ArrayElement { None } + /// Try to parse string as temporal type (Date/DateTime/Time). + /// + /// If this is a String variant, attempts to parse it as a temporal type + /// in order of specificity: DateTime > Date > Time. + /// If parsing succeeds, returns the temporal variant. Otherwise returns self unchanged. + /// + /// For non-String variants, returns self unchanged. + /// + /// # Example + /// ```ignore + /// let elem = ArrayElement::String("1973-06-01".to_string()); + /// let parsed = elem.try_as_temporal(); + /// // parsed is now ArrayElement::Date(...) + /// ``` + pub fn try_as_temporal(self) -> Self { + if let Self::String(ref s) = self { + // Try DateTime first (most specific) + if let Some(dt) = Self::from_datetime_string(s) { + return dt; + } + // Try Date + if let Some(d) = Self::from_date_string(s) { + return d; + } + // Try Time + if let Some(t) = Self::from_time_string(s) { + return t; + } + } + // Fall back to original value if not a string or parsing failed + self + } + /// Convert to string for HashMap keys and display pub fn to_key_string(&self) -> String { match self { @@ -788,6 +918,76 @@ impl ParameterValue { _ => None, } } + + /// Convert this parameter value to a SQL literal string. + /// + /// Only supports scalar values (String, Number, Boolean, Null). + /// Arrays are handled separately in annotation layer VALUES clause generation. + pub fn to_sql(&self) -> String { + match self { + ParameterValue::String(s) => format!("'{}'", s.replace('\'', "''")), + ParameterValue::Number(n) => n.to_string(), + ParameterValue::Boolean(b) => { + if *b { + "TRUE".to_string() + } else { + "FALSE".to_string() + } + } + ParameterValue::Array(_) => { + panic!("ParameterValue::to_sql() does not support arrays. Arrays in annotation layers should be handled via VALUES clause generation.") + } + ParameterValue::Null => "NULL".to_string(), + } + } + + /// Convert a scalar ParameterValue to an ArrayElement. + /// + /// Panics if called on an Array variant. + fn to_array_element(&self) -> ArrayElement { + match self { + ParameterValue::Number(num) => ArrayElement::Number(*num), + ParameterValue::String(s) => ArrayElement::String(s.clone()), + ParameterValue::Boolean(b) => ArrayElement::Boolean(*b), + ParameterValue::Null => ArrayElement::Null, + ParameterValue::Array(_) => panic!("Cannot convert Array to single ArrayElement"), + } + } + + /// Recycle this value to a target array length. + /// + /// - Scalars (String, Number, Boolean, Null) are converted to arrays with n copies + /// - Arrays of length 1 are recycled to n copies of that element + /// - Arrays of target length are returned as-is (after homogenization) + /// - Arrays of other lengths produce an error + pub fn rep(self, n: usize) -> Result { + match self { + // Arrays: homogenize types if mixed, then recycle if needed + ParameterValue::Array(arr) => { + if arr.len() == 1 { + // Recycle the single element + let element = arr[0].clone(); + Ok(ParameterValue::Array(vec![element; n])) + } else if arr.len() == n { + // Already correct length - homogenize for type consistency + let arr = ArrayElement::homogenize(&arr); + Ok(ParameterValue::Array(arr)) + } else { + // Mismatched length - shouldn't happen if validation passed + Err(crate::GgsqlError::InternalError(format!( + "Attempted to recycle array of length {} to length {} (should have been caught earlier)", + arr.len(), + n + ))) + } + } + // Scalars: convert to ArrayElement and replicate n times + scalar => { + let elem = scalar.to_array_element(); + Ok(ParameterValue::Array(vec![elem; n])) + } + } + } } // ============================================================================= @@ -1326,4 +1526,68 @@ mod tests { let result = elem.coerce_to(ArrayElementType::Date); assert!(result.is_err()); } + + #[test] + fn test_homogenize_mixed_number_string() { + let arr = vec![ + ArrayElement::Number(1.0), + ArrayElement::String("foo".to_string()), + ]; + + let homogenized = ArrayElement::homogenize(&arr); + + // Should fall back to String type since "foo" can't be coerced to Number + assert_eq!(homogenized.len(), 2); + assert!(matches!(homogenized[0], ArrayElement::String(_))); + assert!(matches!(homogenized[1], ArrayElement::String(_))); + + if let ArrayElement::String(s) = &homogenized[0] { + assert_eq!(s, "1"); + } + if let ArrayElement::String(s) = &homogenized[1] { + assert_eq!(s, "foo"); + } + } + + #[test] + fn test_try_as_temporal_date() { + let elem = ArrayElement::String("1973-06-01".to_string()); + let parsed = elem.try_as_temporal(); + assert!(matches!(parsed, ArrayElement::Date(_))); + assert_eq!(parsed.to_key_string(), "1973-06-01"); + } + + #[test] + fn test_try_as_temporal_datetime() { + let elem = ArrayElement::String("2024-03-17T14:30:00".to_string()); + let parsed = elem.try_as_temporal(); + assert!(matches!(parsed, ArrayElement::DateTime(_))); + } + + #[test] + fn test_try_as_temporal_time() { + let elem = ArrayElement::String("14:30:00".to_string()); + let parsed = elem.try_as_temporal(); + assert!(matches!(parsed, ArrayElement::Time(_))); + } + + #[test] + fn test_try_as_temporal_non_temporal_string() { + let elem = ArrayElement::String("not a date".to_string()); + let parsed = elem.try_as_temporal(); + assert!(matches!(parsed, ArrayElement::String(_))); + assert_eq!(parsed.to_key_string(), "not a date"); + } + + #[test] + fn test_try_as_temporal_non_string() { + // Non-string elements should pass through unchanged + let elem = ArrayElement::Number(42.0); + let parsed = elem.try_as_temporal(); + assert!(matches!(parsed, ArrayElement::Number(_))); + + let elem = ArrayElement::Boolean(true); + let parsed = elem.try_as_temporal(); + assert!(matches!(parsed, ArrayElement::Boolean(_))); + } } diff --git a/src/writer/vegalite/data.rs b/src/writer/vegalite/data.rs index 22f6c77b..d5dadda7 100644 --- a/src/writer/vegalite/data.rs +++ b/src/writer/vegalite/data.rs @@ -100,16 +100,8 @@ pub(super) fn series_value_at(series: &Series, idx: usize) -> Result { let ca = series .str() .map_err(|e| GgsqlError::WriterError(format!("Failed to cast to string: {}", e)))?; - // Try to parse as number if it looks numeric - if let Some(val) = ca.get(idx) { - if let Ok(num) = val.parse::() { - Ok(json!(num)) - } else { - Ok(json!(val)) - } - } else { - Ok(Value::Null) - } + // Keep strings as strings (don't parse to numbers) + Ok(ca.get(idx).map(|v| json!(v)).unwrap_or(Value::Null)) } Date => { // Convert days since epoch to ISO date string: "YYYY-MM-DD" diff --git a/src/writer/vegalite/encoding.rs b/src/writer/vegalite/encoding.rs index f98043ef..b58619d7 100644 --- a/src/writer/vegalite/encoding.rs +++ b/src/writer/vegalite/encoding.rs @@ -802,7 +802,11 @@ pub(super) fn build_encoding_channel( name: col, original_name, is_dummy, - } => build_column_encoding(aesthetic, col, original_name, *is_dummy, ctx), + } => build_column_encoding(aesthetic, col, original_name, *is_dummy, true, ctx), + AestheticValue::AnnotationColumn { name: col } => { + // Non-positional annotation columns use identity scale + build_column_encoding(aesthetic, col, &None, false, false, ctx) + } AestheticValue::Literal(lit) => build_literal_encoding(aesthetic, lit), } } @@ -813,13 +817,14 @@ fn build_column_encoding( col: &str, original_name: &Option, is_dummy: bool, + is_scaled: bool, ctx: &mut EncodingContext, ) -> Result { let aesthetic_ctx = ctx.spec.get_aesthetic_context(); let primary = aesthetic_ctx .primary_internal_positional(aesthetic) .unwrap_or(aesthetic); - let mut identity_scale = false; + let mut identity_scale = !is_scaled; // Determine field type from scale or infer from data let field_type = determine_field_type_for_aesthetic( @@ -937,6 +942,12 @@ fn build_literal_encoding(aesthetic: &str, lit: &ParameterValue) -> Result json!(n), } } + ParameterValue::Array(_) => { + return Err(crate::GgsqlError::WriterError(format!( + "The `{aes}` SETTING must be scalar, not an array.", + aes = aesthetic + ))) + } _ => lit.to_json(), }; Ok(json!({"value": val})) diff --git a/src/writer/vegalite/mod.rs b/src/writer/vegalite/mod.rs index 9d7c2def..86d77a49 100644 --- a/src/writer/vegalite/mod.rs +++ b/src/writer/vegalite/mod.rs @@ -280,6 +280,7 @@ fn build_layer_encoding( let secondary_encoding = match value { AestheticValue::Column { name: col, .. } => json!({"field": col}), AestheticValue::Literal(lit) => json!({"value": lit.to_json()}), + AestheticValue::AnnotationColumn { name: col } => json!({"field": col}), }; encoding.insert(channel_name, secondary_encoding); continue; diff --git a/tree-sitter-ggsql/grammar.js b/tree-sitter-ggsql/grammar.js index 4f06b8a7..f5f01d34 100644 --- a/tree-sitter-ggsql/grammar.js +++ b/tree-sitter-ggsql/grammar.js @@ -462,6 +462,7 @@ module.exports = grammar({ // All the visualization clauses (same as current grammar) viz_clause: $ => choice( $.draw_clause, + $.place_clause, $.scale_clause, $.facet_clause, $.project_clause, @@ -481,6 +482,14 @@ module.exports = grammar({ optional($.order_clause) ), + // PLACE clause - syntax: PLACE geom [SETTING ...] + // For annotation layers with literal values only (no data mappings) + place_clause: $ => seq( + caseInsensitive('PLACE'), + $.geom_type, + optional($.setting_clause) + ), + // REMAPPING clause: maps stat-computed columns to aesthetics // Syntax: REMAPPING count AS y, sum AS size // Reuses mapping_list for parsing - stat names are treated as column references