diff --git a/CLAUDE.md b/CLAUDE.md index b1a43363..d13546ac 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -334,7 +334,7 @@ pub enum Geom { // Statistical geoms Histogram, Density, Smooth, Boxplot, Violin, // Annotation geoms - Text, Label, Segment, Arrow, Rule, Linear, ErrorBar, + Text, Segment, Arrow, Rule, Linear, ErrorBar, } pub enum AestheticValue { @@ -1211,7 +1211,7 @@ Maps data values (columns or literals) to visual aesthetics. Syntax: `value AS a - **Position**: `x`, `y`, `xmin`, `xmax`, `ymin`, `ymax` - **Color**: `color`, `fill`, `stroke`, `opacity` - **Size/Shape**: `size`, `shape`, `linetype`, `linewidth` -- **Text**: `label`, `family`, `fontface` +- **Text**: `label`, `typeface`, `fontweight`, `italic` **Literal vs Column**: diff --git a/doc/ggsql.xml b/doc/ggsql.xml index 36769a9e..313563a2 100644 --- a/doc/ggsql.xml +++ b/doc/ggsql.xml @@ -168,8 +168,9 @@ linewidth width height - family - fontface + typeface + fontweight + italic hjust vjust diff --git a/doc/syntax/index.qmd b/doc/syntax/index.qmd index 98149041..cdf77e07 100644 --- a/doc/syntax/index.qmd +++ b/doc/syntax/index.qmd @@ -25,6 +25,7 @@ There are many different layers to choose from when visualising your data. Some - [`area`](layer/type/area.qmd) is used to display series as an area chart. - [`ribbon`](layer/type/ribbon.qmd) is used to display series extrema. - [`polygon`](layer/type/polygon.qmd) is used to display arbitrary shapes as polygons. +- [`text`](layer/text.qmd) is used to render datapoints as text. - [`bar`](layer/type/bar.qmd) creates a bar chart, optionally calculating y from the number of records in each bar. - [`density`](layer/type/density.qmd) creates univariate kernel density estimates, showing the distribution of a variable. - [`violin`](layer/type/violin.qmd) displays a rotated kernel density estimate. diff --git a/doc/syntax/layer/type/text.qmd b/doc/syntax/layer/type/text.qmd new file mode 100644 index 00000000..a84bc2df --- /dev/null +++ b/doc/syntax/layer/type/text.qmd @@ -0,0 +1,139 @@ +--- +title: "Text" +--- + +> Layers are declared with the [`DRAW` clause](../clause/draw.qmd). Read the documentation for this clause for a thorough description of how to use it. + +The text layer displays rows in the data as text. It can be used as a visualisation itself, or used to annotate a different layer. + +## Aesthetics +The following aesthetics are recognised by the text layer. + +### Required +* Primary axis (e.g. `x`): Position along the primary axis. +* Secondary axis (e.g. `y`): Position along the secondary axis. +* `label` The text to dislay. + +### Optional +* `stroke` The colour at the contour lines of glyphs. Typically kept blank. +* `fill` The colour of the glyphs. +* `colour` Shorthand for setting `stroke` and `fill` simultaneously. +* `opacity` The opacity of the fill colour. +* `typeface` The typeface to style the lettering. +* `fontsize` The size of the text in points. +* `fontweight` Font weight. Interpretation is writer dependent. Vega-Lite converts everything to 'normal' or 'bold'. Can be one of the following: + * CSS keywords: `'thin'`, `'hairline'`, `'extra-light'`, `'ultra-light'`, `'light'`, `'normal'` (default), `'regular'`, `'lighter'`, `'medium'`, `'semi-bold'`, `'demi-bold'`, `'bold'`, `'bolder'`, `'extra-bold'`, `'ultra-bold'`, `'black'`, `'heavy'` + * Numeric values between 0-1000. +* `italic` Whether text should be italicised. Boolean value (`true` or `false`). +* `hjust` Horizontal justification. Can be a numeric value between 0-1 or one of `"left"`, `"right"` or `"centre"` (default). Interpretation of numeric values is writer-dependent. +* `vjust` Vertical justification. Can be a numeric value between 0-1 or one of `"top"`, `"bottom"` or `"middle"` (default). Interpretation of numeric values is writer-dependent. +* `rotation` Rotation of the text in degrees. + +## Settings +* `offset` Position offset expressed in absolute points. Can be one of the following: + * a single number that applies both horizontally and vertically + * an numeric array `[h, v]` where the first number is the horizontal offset and the second number is the vertical offset. +* `format` Formatting specifier, see explanation below. +* `position`: Determines the position adjustment to use for the layer (default is `'identity'`) + +### Format +The `format` setting can take a string that will be used in formatting the `label` aesthetic. +The basic syntax for this is that the `label` value will be inserted into any place where `{}` appears. +This means that e.g. `SETTING format => '{} species'` will result in the label "adelie species" for a row where the `label` value is "adelie". +Besides simply inserting the value as-is, it is also possible to apply a formatter to `label` before insertion by naming a formatter inside the curly braces prefixed with `:`. +Known formatters are: + +* `{:Title}` will title-case the value (make the first letter in each work upper case) before insertion, e.g. `SETTING format => '{:Title} species'` will become "Adelie species" for the "adelie" label. +* `{:UPPER}` will make the value upper-case, e.g. `SETTING format => '{:UPPER} species'` will become "ADELIE species" for the "adelie" label. +* `{:lower}` works much like `{:UPPER}` but changes the value to lower-case instead. +* `{:time ...}` will format a date/datetime/time value according to the format defined afterwards. The formatting follows strftime format using the Rust chrono library. You can see an overview of the supported syntax at the [chrono docs](https://docs.rs/chrono/latest/chrono/format/strftime/index.html). The basic usage is `SETTING format => '{:time %B %Y}` which would format a value at 2025-07-04 as "July 2025". +* `{:num ...}` will format a numeric value according to the format defined afterwards. The format follows the printf format using the Rust sprintf library. The syntax is `%[flags][width][.precision]type` with the following meaning: + - `flags`: One or more modifiers: + * `-`: left-justify + * `+`: Force sign for positive numbers + * ` `: (space) Space before positive numbers + * `0`: Zero-pad + * `#`: Alternate form (`0x` prefix for hex, etc) + - `width`: The minimum width of characters to render. Depending on the `flags` the string will be padded to be at least this width + - `precision`: The maximum precision of the number. For `%g`/`%G` it is the total number of digits whereas for the rest it is the number of digits to the right of the decimal point + - `type`: How to present the number. One of: + * `d`/`i`: Signed decimal integers + * `u`: Unsigned decimal integers + * `f`/`F`: Decimal floating point + * `e`/`E`: Scientific notation + * `g`/`G`: Shortest form of `e` and `f` + * `o`: Unsigned octal + * `x`/`X`: Unsigned hexadecimal + +## Data transformation +The text layer does not transform its data but passed it through unchanged. + +## Orientation +The text layer has no orientation. The axes are treated symmetrically. + +## Examples + +Standard drawing data points as labels. + +```{ggsql} +VISUALISE bill_len AS x, bill_dep AS y FROM ggsql:penguins +DRAW text MAPPING island AS label +``` + +You can use the `format` setting to tweak the display of the label. + +```{ggsql} +VISUALISE bill_len AS x, bill_dep AS y FROM ggsql:penguins +DRAW text + MAPPING island AS label + SETTING format => '{:UPPER}' +``` + +Setting font properties. Colours are typically mapped to the fill. + +```{ggsql} +VISUALISE bill_len AS x, bill_dep AS y FROM ggsql:penguins +DRAW text + MAPPING + island AS label, + species AS fill, + flipper_len AS fontsize + SETTING + opacity => 0.8, + fontweight => 'bold', + typeface => 'Times New Roman' + SCALE fontsize TO [6, 20] +``` + +The 'stroke' aesthetic is applied to the outline of the text. + +```{ggsql} +SELECT 1 as x, 1 as y +VISUALISE x, y, 'My Label' AS label +DRAW text + SETTING fontsize => 30, stroke => 'red' +``` + +Labelling precomputed bars with the data value. + +```{ggsql} +SELECT island, COUNT(*) AS n FROM ggsql:penguins GROUP BY island +VISUALISE island AS x, n AS y + DRAW bar + DRAW text + MAPPING n AS label + SETTING vjust => 'top', offset => [0, -11], fill => 'white' +``` + +If you label bars at the extreme end, you may need to expand the scale to accommodate the labels. + +```{ggsql} +SELECT island, COUNT(*) AS n FROM ggsql:penguins GROUP BY island +VISUALISE island AS x, n AS y + DRAW bar + DRAW text + MAPPING n AS label + SETTING vjust => 'bottom', offset => [0, 11] + SCALE y FROM [0, 200] +``` + diff --git a/ggsql-vscode/syntaxes/ggsql.tmLanguage.json b/ggsql-vscode/syntaxes/ggsql.tmLanguage.json index 55a37530..e9023981 100644 --- a/ggsql-vscode/syntaxes/ggsql.tmLanguage.json +++ b/ggsql-vscode/syntaxes/ggsql.tmLanguage.json @@ -249,7 +249,7 @@ "patterns": [ { "name": "support.type.aesthetic.ggsql", - "match": "\\b(x|y|xmin|xmax|ymin|ymax|xend|yend|weight|color|colour|fill|stroke|opacity|size|shape|linetype|linewidth|width|height|label|family|fontface|hjust|vjust|panel|row|column)\\b" + "match": "\\b(x|y|xmin|xmax|ymin|ymax|xend|yend|weight|color|colour|fill|stroke|opacity|size|shape|linetype|linewidth|width|height|label|typeface|fontweight|italic|hjust|vjust|panel|row|column)\\b" } ] }, diff --git a/src/execute/mod.rs b/src/execute/mod.rs index 590a5d41..5ed2e65b 100644 --- a/src/execute/mod.rs +++ b/src/execute/mod.rs @@ -687,8 +687,12 @@ fn add_discrete_columns_to_partition_by( .map(|c| c.name.as_str()) .collect(); - // Get aesthetics consumed by stat transforms (if any) + // Build set of excluded aesthetics that should not trigger auto-grouping: + // - Stat-consumed aesthetics (transformed, not grouped) + // - 'label' aesthetic (text content to display, not grouping categories) let consumed_aesthetics = layer.geom.stat_consumed_aesthetics(); + let mut excluded_aesthetics: HashSet<&str> = consumed_aesthetics.iter().copied().collect(); + excluded_aesthetics.insert("label"); for (aesthetic, value) in &layer.mappings.aesthetics { // Skip positional aesthetics - these should not trigger auto-grouping. @@ -698,8 +702,8 @@ fn add_discrete_columns_to_partition_by( continue; } - // Skip stat-consumed aesthetics (they're transformed, not grouped) - if consumed_aesthetics.contains(&aesthetic.as_str()) { + // Skip excluded aesthetics (stat-consumed or label) + if excluded_aesthetics.contains(aesthetic.as_str()) { continue; } diff --git a/src/format.rs b/src/format.rs index 224ace2a..32a1d7ed 100644 --- a/src/format.rs +++ b/src/format.rs @@ -179,29 +179,105 @@ pub fn apply_label_template( } let key = elem.to_key_string(); - let break_val = key.clone(); // Only apply template if no explicit mapping exists - result.entry(key).or_insert_with(|| { - let label = if placeholders.is_empty() { - // No placeholders - use template as literal string - template.to_string() - } else { - // Replace each placeholder with its transformed value - // Process in reverse order to preserve string indices - let mut label = template.to_string(); - for parsed in placeholders.iter().rev() { - let transformed = apply_transformation(&break_val, &parsed.placeholder); - label = label.replace(&parsed.match_text, &transformed); - } - label - }; - Some(label) + result.entry(key.clone()).or_insert_with(|| { + // Use shared format_value helper + Some(format_value(&key, template, &placeholders)) }); } result } +/// Apply label formatting template to a DataFrame column. +/// +/// Returns a new DataFrame with the specified column formatted according to the template. +/// +/// # Arguments +/// * `df` - DataFrame containing the column to format +/// * `column_name` - Name of the column to format +/// * `template` - Template string with placeholders (e.g., "{:Title}", "{:num %.2f}") +/// +/// # Returns +/// New DataFrame with formatted column +/// +/// # Example +/// ```ignore +/// let formatted_df = format_dataframe_column(&df, "_aesthetic_label", "Region: {:Title}")?; +/// ``` +pub fn format_dataframe_column( + df: &polars::prelude::DataFrame, + column_name: &str, + template: &str, +) -> Result { + use polars::prelude::*; + + // Get the column + let column = df + .column(column_name) + .map_err(|e| format!("Column '{}' not found: {}", column_name, e))?; + + // Step 1: Convert entire column to strings + let string_values: Vec> = if let Ok(str_col) = column.str() { + // String column (includes temporal data auto-converted to ISO format) + str_col + .into_iter() + .map(|opt| opt.map(|s| s.to_string())) + .collect() + } else if let Ok(num_col) = column.cast(&DataType::Float64) { + // Numeric column - use shared format_number helper for clean integer formatting + use crate::plot::format_number; + + let f64_col = num_col + .f64() + .map_err(|e| format!("Failed to cast column to f64: {}", e))?; + + f64_col + .into_iter() + .map(|opt| opt.map(format_number)) + .collect() + } else { + return Err(format!( + "Formatting doesn't support type {:?} in column '{}'. Try string or numeric types instead.", + column.dtype(), + column_name + )); + }; + + // Step 2: Apply formatting template to all string values + let placeholders = parse_placeholders(template); + let formatted_values: Vec> = string_values + .into_iter() + .map(|opt| opt.map(|s| format_value(&s, template, &placeholders))) + .collect(); + + let formatted_col = Series::new(column_name.into(), formatted_values); + + // Replace column in DataFrame + let mut new_df = df.clone(); + new_df + .replace(column_name, formatted_col) + .map_err(|e| format!("Failed to replace column: {}", e))?; + + Ok(new_df) +} + +/// Format a single value using template and parsed placeholders +fn format_value(value: &str, template: &str, placeholders: &[ParsedPlaceholder]) -> String { + if placeholders.is_empty() { + // No placeholders - use template as literal string + template.to_string() + } else { + // Replace each placeholder with its transformed value + let mut result = template.to_string(); + for parsed in placeholders.iter().rev() { + let transformed = apply_transformation(value, &parsed.placeholder); + result = result.replace(&parsed.match_text, &transformed); + } + result + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/parser/builder.rs b/src/parser/builder.rs index f3304195..c863202f 100644 --- a/src/parser/builder.rs +++ b/src/parser/builder.rs @@ -611,7 +611,6 @@ fn parse_geom_type(text: &str) -> Result { "boxplot" => Ok(Geom::boxplot()), "violin" => Ok(Geom::violin()), "text" => Ok(Geom::text()), - "label" => Ok(Geom::label()), "segment" => Ok(Geom::segment()), "arrow" => Ok(Geom::arrow()), "rule" => Ok(Geom::rule()), diff --git a/src/plot/aesthetic.rs b/src/plot/aesthetic.rs index 71da550f..c0837a73 100644 --- a/src/plot/aesthetic.rs +++ b/src/plot/aesthetic.rs @@ -58,7 +58,7 @@ pub const USER_FACET_AESTHETICS: &[&str] = &["panel", "row", "column"]; /// - Color aesthetics: color, colour, fill, stroke, opacity /// - Size/shape aesthetics: size, shape, linetype, linewidth /// - Dimension aesthetics: width, height -/// - Text aesthetics: label, family, fontface, hjust, vjust +/// - Text aesthetics: label, typeface, fontweight, italic, hjust, vjust pub const NON_POSITIONAL: &[&str] = &[ "color", "colour", @@ -72,8 +72,10 @@ pub const NON_POSITIONAL: &[&str] = &[ "width", "height", "label", - "family", - "fontface", + "typeface", + "fontweight", + "italic", + "fontsize", "hjust", "vjust", ]; diff --git a/src/plot/layer/geom/label.rs b/src/plot/layer/geom/label.rs deleted file mode 100644 index d2f41d1c..00000000 --- a/src/plot/layer/geom/label.rs +++ /dev/null @@ -1,45 +0,0 @@ -//! Label geom implementation - -use super::{DefaultAesthetics, DefaultParam, DefaultParamValue, GeomTrait, GeomType}; -use crate::plot::types::DefaultAestheticValue; - -/// Label geom - text labels with background -#[derive(Debug, Clone, Copy)] -pub struct Label; - -impl GeomTrait for Label { - fn geom_type(&self) -> GeomType { - GeomType::Label - } - - fn aesthetics(&self) -> DefaultAesthetics { - DefaultAesthetics { - defaults: &[ - ("pos1", DefaultAestheticValue::Required), - ("pos2", DefaultAestheticValue::Required), - ("label", DefaultAestheticValue::Null), - ("fill", DefaultAestheticValue::Null), - ("stroke", DefaultAestheticValue::Null), - ("size", DefaultAestheticValue::Number(11.0)), - ("opacity", DefaultAestheticValue::Number(1.0)), - ("family", DefaultAestheticValue::Null), - ("fontface", DefaultAestheticValue::Null), - ("hjust", DefaultAestheticValue::Null), - ("vjust", DefaultAestheticValue::Null), - ], - } - } - - fn default_params(&self) -> &'static [DefaultParam] { - &[DefaultParam { - name: "position", - default: DefaultParamValue::String("identity"), - }] - } -} - -impl std::fmt::Display for Label { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "label") - } -} diff --git a/src/plot/layer/geom/mod.rs b/src/plot/layer/geom/mod.rs index 69aeffcd..28e86eff 100644 --- a/src/plot/layer/geom/mod.rs +++ b/src/plot/layer/geom/mod.rs @@ -35,7 +35,6 @@ mod boxplot; mod density; mod errorbar; mod histogram; -mod label; mod line; mod linear; mod path; @@ -60,7 +59,6 @@ pub use boxplot::Boxplot; pub use density::Density; pub use errorbar::ErrorBar; pub use histogram::Histogram; -pub use label::Label; pub use line::Line; pub use linear::Linear; pub use path::Path; @@ -95,7 +93,6 @@ pub enum GeomType { Boxplot, Violin, Text, - Label, Segment, Arrow, Rule, @@ -120,7 +117,6 @@ impl std::fmt::Display for GeomType { GeomType::Boxplot => "boxplot", GeomType::Violin => "violin", GeomType::Text => "text", - GeomType::Label => "label", GeomType::Segment => "segment", GeomType::Arrow => "arrow", GeomType::Rule => "rule", @@ -311,11 +307,6 @@ impl Geom { Self(Arc::new(Text)) } - /// Create a Label geom - pub fn label() -> Self { - Self(Arc::new(Label)) - } - /// Create a Segment geom pub fn segment() -> Self { Self(Arc::new(Segment)) @@ -358,7 +349,6 @@ impl Geom { GeomType::Boxplot => Self::boxplot(), GeomType::Violin => Self::violin(), GeomType::Text => Self::text(), - GeomType::Label => Self::label(), GeomType::Segment => Self::segment(), GeomType::Arrow => Self::arrow(), GeomType::Rule => Self::rule(), diff --git a/src/plot/layer/geom/text.rs b/src/plot/layer/geom/text.rs index eff8d887..f7bfcb46 100644 --- a/src/plot/layer/geom/text.rs +++ b/src/plot/layer/geom/text.rs @@ -1,7 +1,10 @@ //! Text geom implementation -use super::{DefaultAesthetics, DefaultParam, DefaultParamValue, GeomTrait, GeomType}; +use super::{DefaultAesthetics, GeomTrait, GeomType}; use crate::plot::types::DefaultAestheticValue; +use crate::plot::{DefaultParam, DefaultParamValue, ParameterValue}; +use crate::{naming, DataFrame, Result}; +use std::collections::HashMap; /// Text geom - text labels at positions #[derive(Debug, Clone, Copy)] @@ -17,23 +20,53 @@ impl GeomTrait for Text { defaults: &[ ("pos1", DefaultAestheticValue::Required), ("pos2", DefaultAestheticValue::Required), - ("label", DefaultAestheticValue::Null), + ("label", DefaultAestheticValue::Required), ("stroke", DefaultAestheticValue::Null), - ("size", DefaultAestheticValue::Number(11.0)), + ("fill", DefaultAestheticValue::String("black")), ("opacity", DefaultAestheticValue::Number(1.0)), - ("family", DefaultAestheticValue::Null), - ("fontface", DefaultAestheticValue::Null), - ("hjust", DefaultAestheticValue::Null), - ("vjust", DefaultAestheticValue::Null), + ("typeface", DefaultAestheticValue::Null), + ("fontsize", DefaultAestheticValue::Number(11.0)), + ("fontweight", DefaultAestheticValue::String("normal")), // Accepts: CSS keywords or numeric values + ("italic", DefaultAestheticValue::Boolean(false)), + ("hjust", DefaultAestheticValue::Number(0.5)), + ("vjust", DefaultAestheticValue::Number(0.5)), + ("rotation", DefaultAestheticValue::Number(0.0)), ], } } fn default_params(&self) -> &'static [DefaultParam] { - &[DefaultParam { - name: "position", - default: DefaultParamValue::String("identity"), - }] + &[ + DefaultParam { + name: "offset", + default: DefaultParamValue::Null, + }, + DefaultParam { + name: "format", + default: DefaultParamValue::Null, + }, + DefaultParam { + name: "position", + default: DefaultParamValue::String("identity"), + }, + ] + } + + fn post_process( + &self, + df: DataFrame, + parameters: &HashMap, + ) -> Result { + // Check if format parameter is specified + let format_template = match parameters.get("format") { + Some(ParameterValue::String(template)) => template, + _ => return Ok(df), // No formatting, return original + }; + + // Use format.rs helper to do the formatting + let label_col_name = naming::aesthetic_column("label"); + crate::format::format_dataframe_column(&df, &label_col_name, format_template) + .map_err(crate::GgsqlError::ValidationError) } } diff --git a/src/plot/main.rs b/src/plot/main.rs index 08eb3d53..8700562f 100644 --- a/src/plot/main.rs +++ b/src/plot/main.rs @@ -506,8 +506,8 @@ mod tests { // Text geom let text = Geom::text().aesthetics(); assert!(text.is_supported("label")); - assert!(text.is_supported("family")); - assert_eq!(text.required(), &["pos1", "pos2"]); + assert!(text.is_supported("typeface")); + assert_eq!(text.required(), &["pos1", "pos2", "label"]); // Statistical geoms only require pos1 assert_eq!(Geom::histogram().aesthetics().required(), &["pos1"]); @@ -759,8 +759,8 @@ mod tests { let labels = spec.labels.as_ref().unwrap(); assert_eq!(labels.labels.get("pos1"), Some(&"X Axis".to_string())); assert_eq!(labels.labels.get("pos2"), Some(&"Y Axis".to_string())); - assert!(labels.labels.get("x").is_none()); - assert!(labels.labels.get("y").is_none()); + assert!(!labels.labels.contains_key("x")); + assert!(!labels.labels.contains_key("y")); } #[test] diff --git a/src/plot/scale/scale_type/continuous.rs b/src/plot/scale/scale_type/continuous.rs index 6ccf750f..e769636d 100644 --- a/src/plot/scale/scale_type/continuous.rs +++ b/src/plot/scale/scale_type/continuous.rs @@ -141,6 +141,10 @@ impl ScaleTypeTrait for Continuous { ArrayElement::Number(1.0), ArrayElement::Number(6.0), ])), + "fontsize" => Ok(Some(vec![ + ArrayElement::Number(8.0), + ArrayElement::Number(20.0), + ])), "opacity" => Ok(Some(vec![ ArrayElement::Number(0.1), ArrayElement::Number(1.0), diff --git a/src/plot/types.rs b/src/plot/types.rs index 99a0a34e..19a21778 100644 --- a/src/plot/types.rs +++ b/src/plot/types.rs @@ -387,7 +387,7 @@ fn time_to_iso_string(nanos: i64) -> String { } /// Format number for display (remove trailing zeros for integers) -fn format_number(n: f64) -> String { +pub fn format_number(n: f64) -> String { if n.fract() == 0.0 { format!("{:.0}", n) } else { diff --git a/src/reader/duckdb.rs b/src/reader/duckdb.rs index 5f3e58ec..c35dc05f 100644 --- a/src/reader/duckdb.rs +++ b/src/reader/duckdb.rs @@ -687,6 +687,10 @@ mod tests { } #[test] + #[cfg_attr( + target_os = "windows", + ignore = "DuckDB crashes on Windows with invalid SQL" + )] fn test_invalid_sql() { let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); let result = reader.execute_sql("INVALID SQL SYNTAX"); diff --git a/src/writer/vegalite/encoding.rs b/src/writer/vegalite/encoding.rs index 2b82639a..f98043ef 100644 --- a/src/writer/vegalite/encoding.rs +++ b/src/writer/vegalite/encoding.rs @@ -553,7 +553,7 @@ fn convert_range_element(elem: &crate::plot::ArrayElement, aesthetic: &str) -> V // Size: convert radius (points) to area (pixels²) "size" => json!(n * n * POINTS_TO_AREA), // Linewidth: convert points to pixels - "linewidth" => json!(n * POINTS_TO_PIXELS), + "linewidth" | "fontsize" => json!(n * POINTS_TO_PIXELS), // Other aesthetics: pass through unchanged _ => json!(n), } @@ -933,7 +933,7 @@ fn build_literal_encoding(aesthetic: &str, lit: &ParameterValue) -> Result json!(n * n * POINTS_TO_AREA), // Linewidth: points → pixels - "linewidth" => json!(n * POINTS_TO_PIXELS), + "linewidth" | "fontsize" => json!(n * POINTS_TO_PIXELS), _ => json!(n), } } @@ -971,8 +971,10 @@ pub(super) fn map_aesthetic_name( "linewidth" => "strokeWidth".to_string(), // Text aesthetics "label" => "text".to_string(), + "fontsize" => "size".to_string(), // All other aesthetics pass through directly // (fill and stroke map to Vega-Lite's separate fill/stroke channels) + // typeface/fontweight/italic/rotation are parsed explicitly _ => aesthetic.to_string(), } } diff --git a/src/writer/vegalite/layer.rs b/src/writer/vegalite/layer.rs index 6bf96347..a96a4895 100644 --- a/src/writer/vegalite/layer.rs +++ b/src/writer/vegalite/layer.rs @@ -9,7 +9,7 @@ use crate::plot::layer::geom::GeomType; use crate::plot::layer::is_transposed; -use crate::plot::ParameterValue; +use crate::plot::{ArrayElement, ParameterValue}; use crate::writer::vegalite::POINTS_TO_PIXELS; use crate::{naming, AestheticValue, DataFrame, Geom, GgsqlError, Layer, Result}; use polars::prelude::ChunkCompareEq; @@ -40,7 +40,6 @@ pub fn geom_to_mark(geom: &Geom) -> Value { GeomType::Violin => "line", GeomType::Boxplot => "boxplot", GeomType::Text => "text", - GeomType::Label => "text", GeomType::Segment => "rule", GeomType::Rule => "rule", GeomType::Linear => "rule", @@ -190,6 +189,7 @@ pub trait GeomRenderer: Send + Sync { fn prepare_data( &self, df: &DataFrame, + _layer: &Layer, _data_key: &str, binned_columns: &HashMap>, ) -> Result { @@ -430,6 +430,7 @@ impl GeomRenderer for LinearRenderer { fn prepare_data( &self, df: &DataFrame, + _layer: &Layer, _data_key: &str, _binned_columns: &HashMap>, ) -> Result { @@ -549,6 +550,563 @@ impl GeomRenderer for LinearRenderer { } } +// ============================================================================= +// Text Renderer +// ============================================================================= + +/// Renderer for text geom - handles font properties via data splitting +pub struct TextRenderer; + +impl TextRenderer { + /// Analyze DataFrame columns to build font property runs using run-length encoding. + /// Returns: + /// - DataFrame where each row represents a run's font properties (family, fontweight, italic, hjust, vjust, angle) + /// - Vec of run lengths corresponding to each row + fn build_font_rle(df: &DataFrame) -> Result<(DataFrame, Vec)> { + use polars::prelude::*; + + let nrows = df.height(); + + if nrows == 0 { + // Return empty DataFrame and empty run lengths + return Ok((DataFrame::default(), Vec::new())); + } + + // Build boolean mask showing where any font property changes + let mut changed = BooleanChunked::full("changed".into(), false, nrows); + let mut font_columns: HashMap<&str, &polars::prelude::Column> = HashMap::new(); + + for aesthetic in [ + "typeface", + "fontweight", + "italic", + "hjust", + "vjust", + "rotation", + ] { + if let Ok(col) = df.column(&naming::aesthetic_column(aesthetic)) { + let col_changed = col.not_equal(&col.shift(1)).map_err(|e| { + GgsqlError::InternalError(format!("Failed to compare column: {}", e)) + })?; + changed = &changed | &col_changed; + font_columns.insert(aesthetic, col); + } + } + + // Extract change indices (where mask is true) + // shift() creates nulls at position 0, which we treat as a change point + let mut change_indices: Vec = Vec::new(); + for (i, val) in changed.iter().enumerate() { + if val == Some(true) || val.is_none() { + // Treat null (from shift) or true as change point + change_indices.push(i); + } + } + + // First row is always a change point (shift comparison is null) + if !change_indices.is_empty() && change_indices[0] != 0 { + change_indices.insert(0, 0); + } else if change_indices.is_empty() { + change_indices.push(0); + } + + // Calculate run lengths + let run_lengths: Vec = change_indices + .iter() + .enumerate() + .map(|(i, &start)| { + let end = change_indices.get(i + 1).copied().unwrap_or(nrows); + end - start + }) + .collect(); + + // Extract rows at change indices (only font columns) + let indices_ca = UInt32Chunked::from_vec( + "indices".into(), + change_indices.iter().map(|&i| i as u32).collect(), + ); + let font_aesthetics = [ + "typeface", + "fontweight", + "italic", + "hjust", + "vjust", + "rotation", + ]; + + let mut result_cols = Vec::new(); + for aesthetic in font_aesthetics { + if let Some(col) = font_columns.get(aesthetic) { + let taken = col.take(&indices_ca).map_err(|e| { + GgsqlError::InternalError(format!( + "Failed to take indices from {}: {}", + aesthetic, e + )) + })?; + result_cols.push(taken); + } + } + + // Create result DataFrame (only font properties, no run_length column) + let result_df = DataFrame::new(result_cols).map_err(|e| { + GgsqlError::InternalError(format!("Failed to create run DataFrame: {}", e)) + })?; + + Ok((result_df, run_lengths)) + } + + /// Convert typeface to Vega-Lite font value + /// Prefers literal over column value + fn convert_typeface( + literal: Option<&ParameterValue>, + column_value: Option<&str>, + ) -> Option { + // First select which value to use (prefer literal) + let value = if let Some(ParameterValue::String(s)) = literal { + s.as_str() + } else { + column_value? + }; + + // Then apply conversion + if !value.is_empty() { + Some(json!(value)) + } else { + None + } + } + + /// Convert fontweight to Vega-Lite fontWeight value + /// Prefers literal over column value + /// Accepts all CSS font-weight keywords and numeric values: + /// - Keywords: 'thin', 'hairline', 'extra-light', 'ultra-light', 'light', + /// 'normal', 'regular', 'lighter', 'medium', 'semi-bold', 'demi-bold', + /// 'bold', 'bolder', 'extra-bold', 'ultra-bold', 'black', 'heavy' + /// - Numeric values: any number + /// - Numeric strings from columns: '100', '400', '700', etc. + /// + /// Always outputs 'normal' or 'bold' for Vega-Lite compatibility: + /// - < 500 → 'normal' (thin, light, normal, regular, lighter) + /// - >= 500 → 'bold' (medium, semi-bold, bold, bolder, extra-bold, black, heavy) + fn convert_fontweight( + literal: Option<&ParameterValue>, + column_value: Option<&str>, + ) -> Option { + // First select which value to use (prefer literal) + let numeric = match literal { + Some(ParameterValue::String(s)) => { + // String literal: keyword or numeric string + Self::parse_fontweight_to_numeric(s.as_str()) + } + Some(ParameterValue::Number(n)) => { + // Numeric literal: use directly + Some(*n) + } + _ => { + // Column value: try to parse + column_value.and_then(Self::parse_fontweight_to_numeric) + } + }?; + + // Apply >= 500 rule to determine bold/normal + let is_bold = numeric >= 500.0; + Some(json!(if is_bold { "bold" } else { "normal" })) + } + + /// Parse fontweight value from string to numeric value + fn parse_fontweight_to_numeric(value: &str) -> Option { + // Try parsing as number first + if let Ok(num) = value.parse::() { + return Some(num); + } + + // Map CSS font-weight keywords to numeric values + // Normalize: convert to lowercase and remove hyphens for flexible matching + let normalized = value.to_lowercase().replace("-", ""); + match normalized.as_str() { + "thin" | "hairline" => Some(100.0), + "extralight" | "ultralight" => Some(200.0), + "light" => Some(300.0), + "normal" | "regular" | "lighter" => Some(400.0), + "medium" => Some(500.0), + "semibold" | "demibold" => Some(600.0), + "bold" | "bolder" => Some(700.0), + "extrabold" | "ultrabold" => Some(800.0), + "black" | "heavy" => Some(900.0), + _ => None, + } + } + + /// Convert italic to Vega-Lite fontStyle value + /// Prefers literal over column value + /// Accepts boolean literals or string column values ('true', 'false', '1', '0') + fn convert_italic( + literal: Option<&ParameterValue>, + column_value: Option<&str>, + ) -> Option { + // First select which value to use (prefer literal) + let value = if let Some(ParameterValue::Boolean(b)) = literal { + *b + } else if let Some(s) = column_value { + // Parse string to boolean + match s.to_lowercase().as_str() { + "true" | "1" => true, + "false" | "0" => false, + _ => return None, + } + } else { + return None; + }; + + // Convert boolean to fontStyle + let style = if value { "italic" } else { "normal" }; + Some(json!(style)) + } + + /// Convert hjust to Vega-Lite align value + /// Prefers literal over column value + fn convert_hjust( + literal: Option<&ParameterValue>, + column_value: Option<&str>, + ) -> Option { + // First extract which value to use (prefer literal) + let value_str = match literal { + Some(ParameterValue::String(s)) => s.to_string(), + Some(ParameterValue::Number(n)) => n.to_string(), + _ => column_value?.to_string(), + }; + + // Then apply conversion inline + let align = match value_str.parse::() { + Ok(v) if v <= 0.25 => "left", + Ok(v) if v >= 0.75 => "right", + _ => match value_str.as_str() { + "left" => "left", + "right" => "right", + _ => "center", + }, + }; + + Some(json!(align)) + } + + /// Convert vjust to Vega-Lite baseline value + /// Prefers literal over column value + fn convert_vjust( + literal: Option<&ParameterValue>, + column_value: Option<&str>, + ) -> Option { + // First extract which value to use (prefer literal) + let value_str = match literal { + Some(ParameterValue::String(s)) => s.to_string(), + Some(ParameterValue::Number(n)) => n.to_string(), + _ => column_value?.to_string(), + }; + + // Then apply conversion inline + let baseline = match value_str.parse::() { + Ok(v) if v <= 0.25 => "bottom", + Ok(v) if v >= 0.75 => "top", + _ => match value_str.as_str() { + "top" => "top", + "bottom" => "bottom", + _ => "middle", + }, + }; + + Some(json!(baseline)) + } + + /// Convert rotation to Vega-Lite angle value (degrees) + /// Prefers literal over column value + /// Normalizes angles to [0, 360) range + fn convert_rotation( + literal: Option<&ParameterValue>, + column_value: Option, + ) -> Option { + // First select which value to use (prefer literal) + let value = if let Some(ParameterValue::Number(n)) = literal { + *n + } else { + column_value? + }; + + // Then apply conversion inline + let normalized = value % 360.0; + let angle = if normalized < 0.0 { + normalized + 360.0 + } else { + normalized + }; + + Some(json!(angle)) + } + + /// Apply font properties to mark object from DataFrame row and layer literals + /// Uses literals from layer parameters if present, otherwise uses DataFrame column values + fn apply_font_properties( + mark_obj: &mut Map, + df: &DataFrame, + row_idx: usize, + layer: &Layer, + ) -> Result<()> { + // Helper to extract string column values using aesthetic column naming + let get_str = |aesthetic: &str| -> Option { + let col_name = naming::aesthetic_column(aesthetic); + df.column(&col_name) + .ok() + .and_then(|col| col.str().ok()) + .and_then(|ca| ca.get(row_idx)) + .map(|s| s.to_string()) + }; + + // Helper to extract numeric column values (for angle) + let get_f64 = |aesthetic: &str| -> Option { + use polars::prelude::*; + let col_name = naming::aesthetic_column(aesthetic); + let col = df.column(&col_name).ok()?; + + // Try as string first (for string-encoded numbers) + if let Ok(ca) = col.str() { + return ca.get(row_idx).and_then(|s| s.parse::().ok()); + } + + // Try as numeric types directly + if let Ok(casted) = col.cast(&DataType::Float64) { + if let Ok(ca) = casted.f64() { + return ca.get(row_idx); + } + } + + None + }; + + // Convert and apply font properties + if let Some(typeface_val) = Self::convert_typeface( + layer.get_literal("typeface"), + get_str("typeface").as_deref(), + ) { + mark_obj.insert("font".to_string(), typeface_val); + } + + if let Some(weight) = Self::convert_fontweight( + layer.get_literal("fontweight"), + get_str("fontweight").as_deref(), + ) { + mark_obj.insert("fontWeight".to_string(), weight); + } + + if let Some(style) = + Self::convert_italic(layer.get_literal("italic"), get_str("italic").as_deref()) + { + mark_obj.insert("fontStyle".to_string(), style); + } + + if let Some(hjust_val) = + Self::convert_hjust(layer.get_literal("hjust"), get_str("hjust").as_deref()) + { + mark_obj.insert("align".to_string(), hjust_val); + } + + if let Some(vjust_val) = + Self::convert_vjust(layer.get_literal("vjust"), get_str("vjust").as_deref()) + { + mark_obj.insert("baseline".to_string(), vjust_val); + } + + if let Some(angle_val) = + Self::convert_rotation(layer.get_literal("rotation"), get_f64("rotation")) + { + mark_obj.insert("angle".to_string(), angle_val); + } + + Ok(()) + } + + /// Build transform with source filter + fn build_transform_with_filter(prototype: &Value, source_key: &str) -> Vec { + let source_filter = json!({ + "filter": { + "field": naming::SOURCE_COLUMN, + "equal": source_key + } + }); + + let existing_transforms = prototype + .get("transform") + .and_then(|t| t.as_array()) + .cloned() + .unwrap_or_default(); + + let mut new_transforms = vec![source_filter]; + new_transforms.extend(existing_transforms); + new_transforms + } + + /// Finalize layers as nested layer with shared encoding (works for single or multiple runs) + fn finalize_nested_layers( + &self, + prototype: Value, + data_key: &str, + font_runs_df: &DataFrame, + run_lengths: &[usize], + layer: &Layer, + ) -> Result> { + // Extract shared encoding from prototype + let shared_encoding = prototype.get("encoding").cloned(); + + // Build base mark object with fixed parameters + let mut base_mark = json!({"type": "text"}); + if let Some(mark_map) = base_mark.as_object_mut() { + // Extract offset parameter (offset => [x, y] or offset => n) + match layer.parameters.get("offset") { + Some(ParameterValue::Array(offset_array)) if offset_array.len() == 2 => { + // Array case: [x, y] + if let ArrayElement::Number(x_offset) = offset_array[0] { + mark_map.insert("xOffset".to_string(), json!(x_offset * POINTS_TO_PIXELS)); + } + if let ArrayElement::Number(y_offset) = offset_array[1] { + mark_map.insert("yOffset".to_string(), json!(-y_offset * POINTS_TO_PIXELS)); + } + } + Some(ParameterValue::Number(offset)) => { + // Single number case: applies to both x and y + mark_map.insert("xOffset".to_string(), json!(offset * POINTS_TO_PIXELS)); + mark_map.insert("yOffset".to_string(), json!(-offset * POINTS_TO_PIXELS)); + } + _ => {} + } + } + + // Build individual layers without encoding (mark + transform only) + // Use run_lengths to get number of runs (works even when no font columns exist) + let nruns = run_lengths.len(); + let mut nested_layers: Vec = Vec::with_capacity(nruns); + + for run_idx in 0..nruns { + let suffix = format!("_font_{}", run_idx); + let source_key = format!("{}{}", data_key, suffix); + + // Clone base mark and apply font-specific properties + let mut mark_obj = base_mark.clone(); + if let Some(mark_map) = mark_obj.as_object_mut() { + Self::apply_font_properties(mark_map, font_runs_df, run_idx, layer)?; + } + + // Create layer with mark and transform (no encoding) + nested_layers.push(json!({ + "mark": mark_obj, + "transform": Self::build_transform_with_filter(&prototype, &source_key) + })); + } + + // Wrap in parent spec with shared encoding + let mut parent_spec = json!({"layer": nested_layers}); + + if let Some(encoding) = shared_encoding { + parent_spec["encoding"] = encoding; + } + + Ok(vec![parent_spec]) + } +} + +impl GeomRenderer for TextRenderer { + fn prepare_data( + &self, + df: &DataFrame, + _layer: &Layer, + _data_key: &str, + binned_columns: &HashMap>, + ) -> Result { + // Note: Label formatting is already applied via Text::post_process() during execution + + // Analyze font columns to get RLE runs + let (font_runs_df, run_lengths) = Self::build_font_rle(df)?; + + // Split data by font runs, tracking cumulative position + let mut components: HashMap> = HashMap::new(); + let mut position = 0; + + for (run_idx, &length) in run_lengths.iter().enumerate() { + let suffix = format!("_font_{}", run_idx); + + // Slice the contiguous run from the DataFrame (more efficient than boolean masking) + let sliced = df.slice(position as i64, length); + + let values = if binned_columns.is_empty() { + dataframe_to_values(&sliced)? + } else { + dataframe_to_values_with_bins(&sliced, binned_columns)? + }; + + components.insert(suffix, values); + position += length; + } + + Ok(PreparedData::Composite { + components, + metadata: Box::new((font_runs_df, run_lengths)), + }) + } + + fn modify_encoding( + &self, + encoding: &mut Map, + _layer: &Layer, + _context: &RenderContext, + ) -> Result<()> { + // Remove font aesthetics from encoding - they only work as mark properties + for &aesthetic in &[ + "typeface", + "fontweight", + "italic", + "hjust", + "vjust", + "rotation", + ] { + encoding.remove(aesthetic); + } + + // Suppress legend and scale for text encoding + if let Some(text_encoding) = encoding.get_mut("text") { + if let Some(text_obj) = text_encoding.as_object_mut() { + text_obj.insert("legend".to_string(), Value::Null); + text_obj.insert("scale".to_string(), Value::Null); + } + } + + Ok(()) + } + + fn needs_source_filter(&self) -> bool { + // TextRenderer handles source filtering in finalize() + false + } + + fn finalize( + &self, + prototype: Value, + layer: &Layer, + data_key: &str, + prepared: &PreparedData, + ) -> Result> { + let PreparedData::Composite { metadata, .. } = prepared else { + return Err(GgsqlError::InternalError( + "TextRenderer::finalize called with non-composite data".to_string(), + )); + }; + + // Downcast metadata to font runs + let (font_runs_df, run_lengths) = metadata + .downcast_ref::<(DataFrame, Vec)>() + .ok_or_else(|| GgsqlError::InternalError("Failed to downcast font runs".to_string()))?; + + // Generate nested layers from font runs (works for single or multiple runs) + self.finalize_nested_layers(prototype, data_key, font_runs_df, run_lengths, layer) + } +} + // ============================================================================= // Ribbon Renderer // ============================================================================= @@ -1197,6 +1755,7 @@ impl GeomRenderer for BoxplotRenderer { fn prepare_data( &self, df: &DataFrame, + _layer: &Layer, _data_key: &str, binned_columns: &HashMap>, ) -> Result { @@ -1248,6 +1807,7 @@ pub fn get_renderer(geom: &Geom) -> Box { GeomType::Polygon => Box::new(PolygonRenderer), GeomType::Boxplot => Box::new(BoxplotRenderer), GeomType::Violin => Box::new(ViolinRenderer), + GeomType::Text => Box::new(TextRenderer), GeomType::Segment => Box::new(SegmentRenderer), GeomType::Linear => Box::new(LinearRenderer), GeomType::ErrorBar => Box::new(ErrorBarRenderer), @@ -1370,6 +1930,920 @@ mod tests { ); } + #[test] + fn test_text_constant_font() { + use crate::naming; + use polars::prelude::*; + + let renderer = TextRenderer; + let layer = Layer::new(crate::plot::Geom::text()); + + // Create DataFrame where all rows have the same font + let df = df! { + naming::aesthetic_column("x").as_str() => &[1.0, 2.0, 3.0], + naming::aesthetic_column("y").as_str() => &[10.0, 20.0, 30.0], + naming::aesthetic_column("label").as_str() => &["A", "B", "C"], + naming::aesthetic_column("typeface").as_str() => &["Arial", "Arial", "Arial"], + } + .unwrap(); + + // Prepare data - should result in single layer with _font_0 component key + let prepared = renderer + .prepare_data(&df, &layer, "test", &HashMap::new()) + .unwrap(); + + match prepared { + PreparedData::Composite { components, .. } => { + // Should have single component with _font_0 key + assert_eq!(components.len(), 1); + assert!(components.contains_key("_font_0")); + } + _ => panic!("Expected Composite"), + } + } + + #[test] + fn test_text_varying_font() { + use crate::naming; + use polars::prelude::*; + + let renderer = TextRenderer; + let layer = Layer::new(crate::plot::Geom::text()); + + // Create DataFrame with different fonts per row + let df = df! { + naming::aesthetic_column("x").as_str() => &[1.0, 2.0, 3.0], + naming::aesthetic_column("y").as_str() => &[10.0, 20.0, 30.0], + naming::aesthetic_column("label").as_str() => &["A", "B", "C"], + naming::aesthetic_column("typeface").as_str() => &["Arial", "Courier", "Times"], + } + .unwrap(); + + // Prepare data - should result in multiple layers + let prepared = renderer + .prepare_data(&df, &layer, "test", &HashMap::new()) + .unwrap(); + + match prepared { + PreparedData::Composite { components, .. } => { + // Should have 3 components (one per unique font) with suffix keys + assert_eq!(components.len(), 3); + assert!(components.contains_key("_font_0")); + assert!(components.contains_key("_font_1")); + assert!(components.contains_key("_font_2")); + } + _ => panic!("Expected Composite"), + } + } + + #[test] + fn test_text_nested_layers_structure() { + use crate::naming; + use polars::prelude::*; + + let renderer = TextRenderer; + let layer = Layer::new(crate::plot::Geom::text()); + + // Create DataFrame with different fonts + let df = df! { + naming::aesthetic_column("x").as_str() => &[1.0, 2.0, 3.0], + naming::aesthetic_column("y").as_str() => &[10.0, 20.0, 30.0], + naming::aesthetic_column("label").as_str() => &["A", "B", "C"], + naming::aesthetic_column("typeface").as_str() => &["Arial", "Courier", "Arial"], + naming::aesthetic_column("fontweight").as_str() => &["bold", "normal", "bold"], + naming::aesthetic_column("italic").as_str() => &["false", "true", "false"], + } + .unwrap(); + + // Prepare data + let prepared = renderer + .prepare_data(&df, &layer, "test", &HashMap::new()) + .unwrap(); + + // Get the components + let components = match &prepared { + PreparedData::Composite { components, .. } => components, + _ => panic!("Expected Composite"), + }; + + // Should have 3 components due to non-contiguous indices + // (Arial+bold+not-italic at index 0, Courier+normal+italic at index 1, Arial+bold+not-italic at index 2) + assert_eq!(components.len(), 3); + + // Build prototype spec + let prototype = json!({ + "mark": {"type": "text"}, + "encoding": { + "x": {"field": naming::aesthetic_column("x"), "type": "quantitative"}, + "y": {"field": naming::aesthetic_column("y"), "type": "quantitative"}, + "text": {"field": naming::aesthetic_column("label"), "type": "nominal"} + } + }); + + // Create a dummy layer + let layer = crate::plot::Layer::new(crate::plot::Geom::text()); + + // Call finalize to get layers + let layers = renderer + .finalize(prototype.clone(), &layer, "test", &prepared) + .unwrap(); + + // For multiple font groups, should return single parent spec with nested layers + assert_eq!(layers.len(), 1); + + let parent_spec = &layers[0]; + + // Parent should have "layer" array + assert!(parent_spec.get("layer").is_some()); + let nested_layers = parent_spec["layer"].as_array().unwrap(); + + // Should have 3 nested layers (one per component) + assert_eq!(nested_layers.len(), 3); + + // Parent should have shared encoding + assert!(parent_spec.get("encoding").is_some()); + + // Each nested layer should have mark and transform, but not encoding + for nested_layer in nested_layers { + assert!(nested_layer.get("mark").is_some()); + assert!(nested_layer.get("transform").is_some()); + assert!(nested_layer.get("encoding").is_none()); + + // Mark should have font properties + let mark = nested_layer["mark"].as_object().unwrap(); + assert!(mark.contains_key("fontWeight")); + assert!(mark.contains_key("fontStyle")); + } + } + + #[test] + fn test_text_varying_angle() { + use crate::naming; + use polars::prelude::*; + + let renderer = TextRenderer; + let layer = Layer::new(crate::plot::Geom::text()); + + // Create DataFrame with different angles + let df = df! { + naming::aesthetic_column("x").as_str() => &[1.0, 2.0, 3.0], + naming::aesthetic_column("y").as_str() => &[10.0, 20.0, 30.0], + naming::aesthetic_column("label").as_str() => &["A", "B", "C"], + naming::aesthetic_column("rotation").as_str() => &["0", "45", "90"], + } + .unwrap(); + + // Prepare data - should result in multiple layers (one per unique angle) + let prepared = renderer + .prepare_data(&df, &layer, "test", &HashMap::new()) + .unwrap(); + + match &prepared { + PreparedData::Composite { components, .. } => { + // Should have 3 components (one per unique angle) + assert_eq!(components.len(), 3); + assert!(components.contains_key("_font_0")); + assert!(components.contains_key("_font_1")); + assert!(components.contains_key("_font_2")); + } + _ => panic!("Expected Composite"), + } + + // Build prototype spec + let prototype = json!({ + "mark": {"type": "text"}, + "encoding": { + "x": {"field": naming::aesthetic_column("x"), "type": "quantitative"}, + "y": {"field": naming::aesthetic_column("y"), "type": "quantitative"}, + "text": {"field": naming::aesthetic_column("label"), "type": "nominal"} + } + }); + + // Create a dummy layer + let layer = crate::plot::Layer::new(crate::plot::Geom::text()); + + // Call finalize to get layers + let layers = renderer + .finalize(prototype.clone(), &layer, "test", &prepared) + .unwrap(); + + // Should return single parent spec with nested layers + assert_eq!(layers.len(), 1); + + let parent_spec = &layers[0]; + let nested_layers = parent_spec["layer"].as_array().unwrap(); + + // Should have 3 nested layers (one per unique angle) + assert_eq!(nested_layers.len(), 3); + + // Each layer should have angle property in mark + for nested_layer in nested_layers { + let mark = nested_layer["mark"].as_object().unwrap(); + assert!(mark.contains_key("angle")); // Vega-Lite uses "angle" property name + } + } + + #[test] + fn test_text_varying_angle_numeric() { + use crate::naming; + use polars::prelude::*; + + let renderer = TextRenderer; + let layer = Layer::new(crate::plot::Geom::text()); + + // Create DataFrame with numeric angle column (matching actual query) + let df = df! { + naming::aesthetic_column("x").as_str() => &[1, 2, 3], + naming::aesthetic_column("y").as_str() => &[1, 2, 3], + naming::aesthetic_column("label").as_str() => &["A", "B", "C"], + naming::aesthetic_column("rotation").as_str() => &[0i32, 180i32, 0i32], // integer column + } + .unwrap(); + + // Prepare data - should result in multiple layers (one per unique angle) + let prepared = renderer + .prepare_data(&df, &layer, "test", &HashMap::new()) + .unwrap(); + + match &prepared { + PreparedData::Composite { components, .. } => { + // Should have 3 components: angle 0 at row 0, angle 180 at row 1, angle 0 at row 2 + // Due to non-contiguous indices, rows 0 and 2 should be in separate components + eprintln!("Number of components: {}", components.len()); + eprintln!( + "Component keys: {:?}", + components.keys().collect::>() + ); + assert_eq!(components.len(), 3); + } + _ => panic!("Expected Composite"), + } + } + + #[test] + fn test_text_angle_integration() { + use crate::execute; + use crate::naming; + use crate::reader::DuckDBReader; + use crate::writer::vegalite::VegaLiteWriter; + use crate::writer::Writer; + + // Integration test: Full pipeline from SQL query to Vega-Lite with angle aesthetic + // This tests that angle values properly create separate layers with angle mark properties + + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + // Query with text geom and varying angles + let query = r#" + SELECT + n::INTEGER as x, + n::INTEGER as y, + chr(65 + n::INTEGER) as label, + CASE + WHEN n = 0 THEN 0 + WHEN n = 1 THEN 45 + WHEN n = 2 THEN 90 + ELSE 0 + END as rot + FROM generate_series(0, 2) as t(n) + VISUALISE x, y, label, rot AS rotation + DRAW text + "#; + + // Execute and prepare data + let prepared = execute::prepare_data_with_reader(query, &reader).unwrap(); + assert_eq!(prepared.specs.len(), 1); + + let spec = &prepared.specs[0]; + assert_eq!(spec.layers.len(), 1); + + // Generate Vega-Lite JSON + let writer = VegaLiteWriter::new(); + let json_str = writer.write(spec, &prepared.data).unwrap(); + let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap(); + + // Text renderer should create nested layers structure + assert!( + vl_spec["layer"].is_array(), + "Should have top-level layer array" + ); + let top_layers = vl_spec["layer"].as_array().unwrap(); + assert_eq!(top_layers.len(), 1, "Should have one parent text layer"); + + // Parent layer should have shared encoding and nested layers + let parent_layer = &top_layers[0]; + assert!( + parent_layer["encoding"].is_object(), + "Parent layer should have shared encoding" + ); + assert!( + parent_layer["layer"].is_array(), + "Parent layer should have nested layers" + ); + + let nested_layers = parent_layer["layer"].as_array().unwrap(); + + // Should have multiple nested layers (one per unique angle value) + // We have angles: 0, 45, 90, 0 -> but non-contiguous 0s split into separate layers + assert!( + nested_layers.len() >= 3, + "Should have at least 3 nested layers for different angles, got {}", + nested_layers.len() + ); + + // Each nested layer should have mark with angle property + for (idx, nested_layer) in nested_layers.iter().enumerate() { + let mark = nested_layer["mark"].as_object().unwrap(); + assert!( + mark.contains_key("angle"), // Vega-Lite uses "angle" property name + "Nested layer {} mark should have angle property", + idx + ); + assert_eq!(mark["type"], "text"); + + // Should have source filter transform + assert!(nested_layer["transform"].is_array()); + + // Should NOT have encoding (inherited from parent) + assert!(nested_layer.get("encoding").is_none()); + } + + // Verify angles are present and normalized [0, 360) + let angles: Vec = nested_layers + .iter() + .filter_map(|layer| { + layer["mark"] + .as_object() + .and_then(|m| m.get("angle")) + .and_then(|a| a.as_f64()) + }) + .collect(); + + // Should have the three distinct angles: 0, 45, 90 + assert!(angles.contains(&0.0), "Should have 0° angle"); + assert!(angles.contains(&45.0), "Should have 45° angle"); + assert!(angles.contains(&90.0), "Should have 90° angle"); + + // Verify data has angle column + let data_values = vl_spec["data"]["values"].as_array().unwrap(); + assert!(!data_values.is_empty()); + + let angle_col = naming::aesthetic_column("rotation"); + for row in data_values { + assert!( + row[&angle_col].is_number(), + "Data row should have numeric angle: {:?}", + row + ); + } + } + + #[test] + fn test_text_offset_parameters() { + use crate::execute; + use crate::reader::DuckDBReader; + use crate::writer::vegalite::VegaLiteWriter; + use crate::writer::Writer; + + // Integration test: offset parameter should map to xOffset/yOffset + + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + // Query with offset parameter + let query = r#" + SELECT + n::INTEGER as x, + n::INTEGER as y, + chr(65 + n::INTEGER) as label + FROM generate_series(0, 2) as t(n) + VISUALISE x, y, label + DRAW text SETTING offset => [5, -10] + "#; + + // Execute and prepare data + let prepared = execute::prepare_data_with_reader(query, &reader).unwrap(); + assert_eq!(prepared.specs.len(), 1); + + let spec = &prepared.specs[0]; + assert_eq!(spec.layers.len(), 1); + + // Generate Vega-Lite JSON + let writer = VegaLiteWriter::new(); + let json_str = writer.write(spec, &prepared.data).unwrap(); + let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap(); + + // Text renderer creates nested layers structure + let top_layers = vl_spec["layer"].as_array().unwrap(); + assert_eq!(top_layers.len(), 1); + + let parent_layer = &top_layers[0]; + let nested_layers = parent_layer["layer"].as_array().unwrap(); + + // All nested layers should have xOffset and yOffset in mark + for nested_layer in nested_layers { + let mark = nested_layer["mark"].as_object().unwrap(); + + assert!( + mark.contains_key("xOffset"), + "Mark should have xOffset from offset" + ); + assert_eq!( + mark["xOffset"].as_f64().unwrap(), + 5.0 * POINTS_TO_PIXELS, + "xOffset should be 5 * POINTS_TO_PIXELS" + ); + + assert!( + mark.contains_key("yOffset"), + "Mark should have yOffset from offset" + ); + assert_eq!( + mark["yOffset"].as_f64().unwrap(), + 10.0 * POINTS_TO_PIXELS, + "yOffset should be 10 * POINTS_TO_PIXELS (negated from offset[1] = -10)" + ); + } + } + + #[test] + fn test_text_label_formatting() { + use crate::execute; + use crate::reader::DuckDBReader; + use crate::writer::vegalite::VegaLiteWriter; + use crate::writer::Writer; + + // Integration test: format parameter should transform label values + + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + // Query with format parameter using Title case transformation + let query = r#" + SELECT + n::INTEGER as x, + n::INTEGER as y, + CASE + WHEN n = 0 THEN 'north region' + WHEN n = 1 THEN 'south region' + ELSE 'east region' + END as region + FROM generate_series(0, 2) as t(n) + VISUALISE x, y, region AS label + DRAW text SETTING format => 'Region: {:Title}' + "#; + + // Execute and prepare data + let prepared = execute::prepare_data_with_reader(query, &reader).unwrap(); + assert_eq!(prepared.specs.len(), 1); + + let spec = &prepared.specs[0]; + assert_eq!(spec.layers.len(), 1); + + // Generate Vega-Lite JSON + let writer = VegaLiteWriter::new(); + let json_str = writer.write(spec, &prepared.data).unwrap(); + let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap(); + + // Check that data has formatted labels + let data_values = vl_spec["data"]["values"].as_array().unwrap(); + assert!(!data_values.is_empty()); + + // Verify formatted labels in the data + let label_col = crate::naming::aesthetic_column("label"); + + // Check each row has properly formatted labels + let labels: Vec<&str> = data_values + .iter() + .filter_map(|row| row[&label_col].as_str()) + .collect(); + + assert_eq!(labels.len(), 3); + assert!(labels.contains(&"Region: North Region")); + assert!(labels.contains(&"Region: South Region")); + assert!(labels.contains(&"Region: East Region")); + } + + #[test] + fn test_text_label_formatting_numeric() { + use crate::execute; + use crate::reader::DuckDBReader; + use crate::writer::vegalite::VegaLiteWriter; + use crate::writer::Writer; + + // Test numeric formatting with printf-style format + + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + let query = r#" + SELECT + n::INTEGER as x, + n::INTEGER as y, + n::FLOAT * 10.5 as value + FROM generate_series(0, 2) as t(n) + VISUALISE x, y, value AS label + DRAW text SETTING format => '${:num %.2f}' + "#; + + let prepared = execute::prepare_data_with_reader(query, &reader).unwrap(); + let spec = &prepared.specs[0]; + + let writer = VegaLiteWriter::new(); + let json_str = writer.write(spec, &prepared.data).unwrap(); + let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap(); + + let data_values = vl_spec["data"]["values"].as_array().unwrap(); + let label_col = crate::naming::aesthetic_column("label"); + + let labels: Vec<&str> = data_values + .iter() + .filter_map(|row| row[&label_col].as_str()) + .collect(); + + // Should have formatted currency values + assert_eq!(labels.len(), 3); + assert!(labels.contains(&"$0.00")); + assert!(labels.contains(&"$10.50")); + assert!(labels.contains(&"$21.00")); + } + + #[test] + fn test_text_setting_fontweight() { + use crate::execute; + use crate::reader::DuckDBReader; + use crate::writer::vegalite::VegaLiteWriter; + use crate::writer::Writer; + + // Integration test: SETTING fontweight => 'bold' should add fontWeight to base mark + + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + // Query with fontweight in SETTING + let query = r#" + SELECT + n::INTEGER as x, + n::INTEGER as y, + chr(65 + n::INTEGER) as label + FROM generate_series(0, 2) as t(n) + VISUALISE x, y, label + DRAW text SETTING fontweight => 'bold' + "#; + + // Execute and prepare data + let prepared = execute::prepare_data_with_reader(query, &reader).unwrap(); + assert_eq!(prepared.specs.len(), 1); + + let spec = &prepared.specs[0]; + assert_eq!(spec.layers.len(), 1); + + // Generate Vega-Lite JSON + let writer = VegaLiteWriter::new(); + let json_str = writer.write(spec, &prepared.data).unwrap(); + let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap(); + + // Text renderer creates nested layers structure + let top_layers = vl_spec["layer"].as_array().unwrap(); + assert_eq!(top_layers.len(), 1); + + let parent_layer = &top_layers[0]; + let nested_layers = parent_layer["layer"].as_array().unwrap(); + + // All nested layers should have fontWeight: "bold" in mark (from SETTING) + for nested_layer in nested_layers { + let mark = nested_layer["mark"].as_object().unwrap(); + + assert!( + mark.contains_key("fontWeight"), + "Mark should have fontWeight from SETTING fontweight" + ); + assert_eq!( + mark["fontWeight"].as_str().unwrap(), + "bold", + "fontWeight should be bold" + ); + } + } + + #[test] + fn test_text_setting_fontweight_numeric() { + use crate::execute; + use crate::reader::DuckDBReader; + use crate::writer::vegalite::VegaLiteWriter; + use crate::writer::Writer; + + // Test numeric fontweight values (700 should map to 'bold') + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + let query = r#" + SELECT + n::INTEGER as x, + n::INTEGER as y, + chr(65 + n::INTEGER) as label + FROM generate_series(0, 2) as t(n) + VISUALISE x, y, label + DRAW text SETTING fontweight => 700 + "#; + + let prepared = execute::prepare_data_with_reader(query, &reader).unwrap(); + let spec = &prepared.specs[0]; + + let writer = VegaLiteWriter::new(); + let json_str = writer.write(spec, &prepared.data).unwrap(); + let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap(); + + let top_layers = vl_spec["layer"].as_array().unwrap(); + let parent_layer = &top_layers[0]; + let nested_layers = parent_layer["layer"].as_array().unwrap(); + + // Numeric 700 should map to 'bold' + for nested_layer in nested_layers { + let mark = nested_layer["mark"].as_object().unwrap(); + assert_eq!(mark["fontWeight"].as_str().unwrap(), "bold"); + } + } + + #[test] + fn test_text_setting_fontweight_numeric_normal() { + use crate::execute; + use crate::reader::DuckDBReader; + use crate::writer::vegalite::VegaLiteWriter; + use crate::writer::Writer; + + // Test numeric fontweight values (400 should map to 'normal') + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + let query = r#" + SELECT + n::INTEGER as x, + n::INTEGER as y, + chr(65 + n::INTEGER) as label + FROM generate_series(0, 2) as t(n) + VISUALISE x, y, label + DRAW text SETTING fontweight => 400 + "#; + + let prepared = execute::prepare_data_with_reader(query, &reader).unwrap(); + let spec = &prepared.specs[0]; + + let writer = VegaLiteWriter::new(); + let json_str = writer.write(spec, &prepared.data).unwrap(); + let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap(); + + let top_layers = vl_spec["layer"].as_array().unwrap(); + let parent_layer = &top_layers[0]; + let nested_layers = parent_layer["layer"].as_array().unwrap(); + + // Numeric 400 should map to 'normal' + for nested_layer in nested_layers { + let mark = nested_layer["mark"].as_object().unwrap(); + assert_eq!(mark["fontWeight"].as_str().unwrap(), "normal"); + } + } + + #[test] + fn test_text_setting_fontweight_keywords() { + use crate::execute; + use crate::reader::DuckDBReader; + use crate::writer::vegalite::VegaLiteWriter; + use crate::writer::Writer; + + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + // Test 'bolder' keyword (should map to 'bold') + let query = r#" + SELECT 1 as x, 1 as y, 'A' as label + VISUALISE x, y, label + DRAW text SETTING fontweight => 'bolder' + "#; + + let prepared = execute::prepare_data_with_reader(query, &reader).unwrap(); + let spec = &prepared.specs[0]; + + let writer = VegaLiteWriter::new(); + let json_str = writer.write(spec, &prepared.data).unwrap(); + let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap(); + + let top_layers = vl_spec["layer"].as_array().unwrap(); + let parent_layer = &top_layers[0]; + let nested_layers = parent_layer["layer"].as_array().unwrap(); + + for nested_layer in nested_layers { + let mark = nested_layer["mark"].as_object().unwrap(); + assert_eq!(mark["fontWeight"].as_str().unwrap(), "bold"); + } + + // Test 'lighter' keyword (should map to 'normal') + let query = r#" + SELECT 1 as x, 1 as y, 'A' as label + VISUALISE x, y, label + DRAW text SETTING fontweight => 'lighter' + "#; + + let prepared = execute::prepare_data_with_reader(query, &reader).unwrap(); + let spec = &prepared.specs[0]; + + let json_str = writer.write(spec, &prepared.data).unwrap(); + let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap(); + + let top_layers = vl_spec["layer"].as_array().unwrap(); + let parent_layer = &top_layers[0]; + let nested_layers = parent_layer["layer"].as_array().unwrap(); + + for nested_layer in nested_layers { + let mark = nested_layer["mark"].as_object().unwrap(); + assert_eq!(mark["fontWeight"].as_str().unwrap(), "normal"); + } + + // Test 'semi-bold' keyword (should map to 'bold' since 600 >= 500) + let query = r#" + SELECT 1 as x, 1 as y, 'A' as label + VISUALISE x, y, label + DRAW text SETTING fontweight => 'semi-bold' + "#; + + let prepared = execute::prepare_data_with_reader(query, &reader).unwrap(); + let spec = &prepared.specs[0]; + + let json_str = writer.write(spec, &prepared.data).unwrap(); + let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap(); + + let top_layers = vl_spec["layer"].as_array().unwrap(); + let parent_layer = &top_layers[0]; + let nested_layers = parent_layer["layer"].as_array().unwrap(); + + for nested_layer in nested_layers { + let mark = nested_layer["mark"].as_object().unwrap(); + assert_eq!(mark["fontWeight"].as_str().unwrap(), "bold"); + } + + // Test 'light' keyword (should map to 'normal' since 300 < 500) + let query = r#" + SELECT 1 as x, 1 as y, 'A' as label + VISUALISE x, y, label + DRAW text SETTING fontweight => 'light' + "#; + + let prepared = execute::prepare_data_with_reader(query, &reader).unwrap(); + let spec = &prepared.specs[0]; + + let json_str = writer.write(spec, &prepared.data).unwrap(); + let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap(); + + let top_layers = vl_spec["layer"].as_array().unwrap(); + let parent_layer = &top_layers[0]; + let nested_layers = parent_layer["layer"].as_array().unwrap(); + + for nested_layer in nested_layers { + let mark = nested_layer["mark"].as_object().unwrap(); + assert_eq!(mark["fontWeight"].as_str().unwrap(), "normal"); + } + } + + #[test] + fn test_fontweight_keyword_to_numeric_conversion() { + // Test parse_fontweight_to_numeric helper function - all CSS keywords + + // 100 - thin/hairline + assert_eq!( + TextRenderer::parse_fontweight_to_numeric("thin"), + Some(100.0) + ); + assert_eq!( + TextRenderer::parse_fontweight_to_numeric("hairline"), + Some(100.0) + ); + + // 200 - extra-light/ultra-light + assert_eq!( + TextRenderer::parse_fontweight_to_numeric("extra-light"), + Some(200.0) + ); + assert_eq!( + TextRenderer::parse_fontweight_to_numeric("extralight"), + Some(200.0) + ); + assert_eq!( + TextRenderer::parse_fontweight_to_numeric("ultra-light"), + Some(200.0) + ); + assert_eq!( + TextRenderer::parse_fontweight_to_numeric("ultralight"), + Some(200.0) + ); + + // 300 - light + assert_eq!( + TextRenderer::parse_fontweight_to_numeric("light"), + Some(300.0) + ); + + // 400 - normal/regular/lighter + assert_eq!( + TextRenderer::parse_fontweight_to_numeric("normal"), + Some(400.0) + ); + assert_eq!( + TextRenderer::parse_fontweight_to_numeric("regular"), + Some(400.0) + ); + assert_eq!( + TextRenderer::parse_fontweight_to_numeric("lighter"), + Some(400.0) + ); + + // 500 - medium + assert_eq!( + TextRenderer::parse_fontweight_to_numeric("medium"), + Some(500.0) + ); + + // 600 - semi-bold/demi-bold + assert_eq!( + TextRenderer::parse_fontweight_to_numeric("semi-bold"), + Some(600.0) + ); + assert_eq!( + TextRenderer::parse_fontweight_to_numeric("semibold"), + Some(600.0) + ); + assert_eq!( + TextRenderer::parse_fontweight_to_numeric("demi-bold"), + Some(600.0) + ); + assert_eq!( + TextRenderer::parse_fontweight_to_numeric("demibold"), + Some(600.0) + ); + + // 700 - bold/bolder + assert_eq!( + TextRenderer::parse_fontweight_to_numeric("bold"), + Some(700.0) + ); + assert_eq!( + TextRenderer::parse_fontweight_to_numeric("bolder"), + Some(700.0) + ); + + // 800 - extra-bold/ultra-bold + assert_eq!( + TextRenderer::parse_fontweight_to_numeric("extra-bold"), + Some(800.0) + ); + assert_eq!( + TextRenderer::parse_fontweight_to_numeric("extrabold"), + Some(800.0) + ); + assert_eq!( + TextRenderer::parse_fontweight_to_numeric("ultra-bold"), + Some(800.0) + ); + assert_eq!( + TextRenderer::parse_fontweight_to_numeric("ultrabold"), + Some(800.0) + ); + + // 900 - black/heavy + assert_eq!( + TextRenderer::parse_fontweight_to_numeric("black"), + Some(900.0) + ); + assert_eq!( + TextRenderer::parse_fontweight_to_numeric("heavy"), + Some(900.0) + ); + + // Case insensitive + assert_eq!( + TextRenderer::parse_fontweight_to_numeric("BOLD"), + Some(700.0) + ); + assert_eq!( + TextRenderer::parse_fontweight_to_numeric("Normal"), + Some(400.0) + ); + assert_eq!( + TextRenderer::parse_fontweight_to_numeric("SEMI-BOLD"), + Some(600.0) + ); + + // Numeric strings pass through + assert_eq!( + TextRenderer::parse_fontweight_to_numeric("100"), + Some(100.0) + ); + assert_eq!( + TextRenderer::parse_fontweight_to_numeric("400"), + Some(400.0) + ); + assert_eq!( + TextRenderer::parse_fontweight_to_numeric("700"), + Some(700.0) + ); + + // Invalid values + assert_eq!(TextRenderer::parse_fontweight_to_numeric("invalid"), None); + assert_eq!(TextRenderer::parse_fontweight_to_numeric(""), None); + } + #[test] fn test_violin_mirroring() { use crate::naming; diff --git a/src/writer/vegalite/mod.rs b/src/writer/vegalite/mod.rs index 2b0dceca..7935e30e 100644 --- a/src/writer/vegalite/mod.rs +++ b/src/writer/vegalite/mod.rs @@ -97,7 +97,7 @@ fn prepare_layer_data( let renderer = get_renderer(&layer.geom); // Prepare data using the renderer (handles both standard and composite cases) - let prepared = renderer.prepare_data(df, data_key, binned_columns)?; + let prepared = renderer.prepare_data(df, layer, data_key, binned_columns)?; // Add data to individual datasets based on prepared type match &prepared { @@ -1401,6 +1401,10 @@ mod tests { map_aesthetic_name("label", &ctx, CoordKind::Cartesian), "text" ); + assert_eq!( + map_aesthetic_name("fontsize", &ctx, CoordKind::Cartesian), + "size" + ); // Test with polar coord kind - internal positional maps to radius/theta // regardless of the context's user-facing names @@ -1523,6 +1527,73 @@ mod tests { assert_eq!(vl_spec["layer"][0]["mark"]["clip"], true); } + #[test] + fn test_fontsize_linear_scaling() { + use crate::plot::{ArrayElement, OutputRange, Scale, ScaleType}; + + let writer = VegaLiteWriter::new(); + + // Create spec with text geom using fontsize aesthetic + let mut spec = Plot::new(); + let layer = Layer::new(Geom::text()) + .with_aesthetic( + "pos1".to_string(), + AestheticValue::standard_column("x".to_string()), + ) + .with_aesthetic( + "pos2".to_string(), + AestheticValue::standard_column("y".to_string()), + ) + .with_aesthetic( + "label".to_string(), + AestheticValue::standard_column("label".to_string()), + ) + .with_aesthetic( + "fontsize".to_string(), + AestheticValue::standard_column("value".to_string()), + ); + spec.layers.push(layer); + + // Add fontsize scale with explicit range + let mut scale = Scale::new("fontsize"); + scale.scale_type = Some(ScaleType::continuous()); + scale.output_range = Some(OutputRange::Array(vec![ + ArrayElement::Number(10.0), + ArrayElement::Number(20.0), + ])); + spec.scales.push(scale); + + // Create DataFrame + let df = df! { + "x" => &[1, 2, 3], + "y" => &[1, 2, 3], + "label" => &["A", "B", "C"], + "value" => &[1.0, 2.0, 3.0], + } + .unwrap(); + + // Generate Vega-Lite JSON + let json_str = writer.write(&spec, &wrap_data(df)).unwrap(); + let vl_spec: Value = serde_json::from_str(&json_str).unwrap(); + + // Verify fontsize maps to size channel + let encoding = &vl_spec["layer"][0]["encoding"]; + assert!(encoding["size"].is_object(), "Should have size encoding"); + assert!( + encoding["fontsize"].is_null(), + "Should not have fontsize encoding" + ); + + // Verify scale range is linear (no area conversion) + let scale_range = &encoding["size"]["scale"]["range"]; + assert!(scale_range.is_array(), "Scale should have range array"); + let range = scale_range.as_array().unwrap(); + assert_eq!(range.len(), 2); + // Should be 10 and 20 converted to pixels, NOT ~31 and ~126 (which would be area-converted) + assert_eq!(range[0].as_f64().unwrap(), 10.0 * POINTS_TO_PIXELS); + assert_eq!(range[1].as_f64().unwrap(), 20.0 * POINTS_TO_PIXELS); + } + #[test] fn test_literal_color() { let writer = VegaLiteWriter::new(); diff --git a/tree-sitter-ggsql/grammar.js b/tree-sitter-ggsql/grammar.js index b1ef98db..3524dc0d 100644 --- a/tree-sitter-ggsql/grammar.js +++ b/tree-sitter-ggsql/grammar.js @@ -672,7 +672,7 @@ module.exports = grammar({ // Size and shape 'size', 'shape', 'linetype', 'linewidth', 'width', 'height', // Text aesthetics - 'label', 'family', 'fontface', 'hjust', 'vjust', + 'label', 'typeface', 'fontweight', 'italic', 'fontsize', 'hjust', 'vjust', 'rotation', // Specialty aesthetics, 'coef', 'intercept', // Facet aesthetics diff --git a/tree-sitter-ggsql/queries/highlights.scm b/tree-sitter-ggsql/queries/highlights.scm index 576b19f1..b685b750 100644 --- a/tree-sitter-ggsql/queries/highlights.scm +++ b/tree-sitter-ggsql/queries/highlights.scm @@ -52,10 +52,12 @@ "width" "height" "label" - "family" - "fontface" + "typeface" + "fontweight" + "italic" "hjust" "vjust" + "rotation" "panel" "row" "column"