diff --git a/src/Cargo.toml b/src/Cargo.toml index f2c85bc6..ef5e9ba8 100644 --- a/src/Cargo.toml +++ b/src/Cargo.toml @@ -79,7 +79,6 @@ ureq = "3" default = ["duckdb", "sqlite", "vegalite", "ipc", "parquet", "builtin-data"] ipc = ["polars/ipc"] duckdb = ["dep:duckdb", "dep:arrow"] -polars-sql = ["polars/sql"] parquet = ["polars/parquet"] postgres = ["dep:postgres"] sqlite = ["dep:rusqlite"] @@ -88,7 +87,7 @@ ggplot2 = [] builtin-data = [] python = ["dep:pyo3"] rest-api = ["dep:axum", "dep:tokio", "dep:tower-http", "dep:tracing", "dep:tracing-subscriber", "duckdb", "vegalite"] -all-readers = ["duckdb", "postgres", "sqlite", "polars-sql"] +all-readers = ["duckdb", "postgres", "sqlite"] all-writers = ["vegalite", "ggplot2", "plotters"] # cargo-packager configuration for cross-platform installers diff --git a/src/cli.rs b/src/cli.rs index b6974ad3..6ee32f4b 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -29,7 +29,7 @@ pub enum Commands { /// The ggsql query to execute query: String, - /// Data source connection string (duckdb://, sqlite://, polars://) + /// Data source connection string (duckdb://, sqlite://) #[arg(long, default_value = "duckdb://memory")] reader: String, @@ -51,7 +51,7 @@ pub enum Commands { /// Path to .sql file containing ggsql query file: PathBuf, - /// Data source connection string (duckdb://, sqlite://, polars://) + /// Data source connection string (duckdb://, sqlite://) #[arg(long, default_value = "duckdb://memory")] reader: String, @@ -167,23 +167,6 @@ fn cmd_exec(query: String, reader: String, writer: String, output: Option r, - Err(e) => { - eprintln!("Failed to create reader: {}", e); - std::process::exit(1); - } - }; - exec_with_reader(&query, &r, &writer, output, verbose); - } - #[cfg(not(feature = "polars-sql"))] - { - eprintln!("Polars reader not compiled in. Rebuild with --features polars-sql"); - std::process::exit(1); - } } else if reader.starts_with("sqlite://") { #[cfg(feature = "sqlite")] { diff --git a/src/execute/layer.rs b/src/execute/layer.rs index 4993cf40..f41ddba9 100644 --- a/src/execute/layer.rs +++ b/src/execute/layer.rs @@ -31,11 +31,12 @@ pub fn layer_source_query( layer: &mut Layer, materialized_ctes: &HashSet, has_global: bool, + dialect: &dyn SqlDialect, ) -> Result { match &layer.source { Some(crate::DataSource::Annotation) => { // Annotation layers: process parameters and return complete VALUES clause (with on-the-fly recycling) - process_annotation_layer(layer) + process_annotation_layer(layer, dialect) } Some(crate::DataSource::Identifier(name)) => { // Regular table or CTE @@ -84,6 +85,7 @@ pub fn layer_source_query( pub fn build_layer_select_list( layer: &Layer, type_requirements: &[TypeRequirement], + dialect: &dyn SqlDialect, ) -> Vec { let mut select_exprs = Vec::new(); @@ -117,7 +119,7 @@ pub fn build_layer_select_list( } AestheticValue::Literal(lit) => { // Literals become columns with prefixed aesthetic name - format!("{} AS \"{}\"", lit.to_sql(), aes_col_name) + format!("{} AS \"{}\"", lit.to_sql(dialect), aes_col_name) } }; @@ -304,8 +306,7 @@ pub fn apply_pre_stat_transform( // aesthetic_schema (aesthetic columns added by build_layer_base_query). // The base query produces SELECT *, col AS __ggsql_aes_x__, ... so the // actual SQL output has both, but they come from different schema sources. - // This avoids SELECT * EXCLUDE which has portability issues - // (Polars SQL silently drops re-added columns with the same name). + // This avoids SELECT * EXCLUDE which has portability issues across SQL backends. let mut seen: HashSet<&str> = HashSet::new(); let combined_cols = full_schema.iter().chain(aesthetic_schema.iter()); @@ -356,13 +357,14 @@ pub fn build_layer_base_query( layer: &Layer, source_query: &str, type_requirements: &[TypeRequirement], + dialect: &dyn SqlDialect, ) -> String { // Annotation layers now go through the same pipeline as regular layers. // The source_query for annotations is a VALUES clause with raw column names, // and this function wraps it with SELECT expressions that rename to prefixed aesthetic names. // Build SELECT list with aesthetic renames, casts - let select_exprs = build_layer_select_list(layer, type_requirements); + let select_exprs = build_layer_select_list(layer, type_requirements, dialect); let select_clause = if select_exprs.is_empty() { "*".to_string() } else { @@ -603,30 +605,11 @@ where if stat_rename_exprs.is_empty() { transformed_query } else { - // If the transformed query uses CTEs (WITH ... SELECT ...), - // we can't wrap it in a subquery because Polars SQL doesn't - // support CTEs inside subqueries. Instead, split into CTE - // prefix + trailing SELECT, then append the trailing SELECT - // as another CTE and add the rename SELECT on top. - if let Some((cte_prefix, trailing_select)) = - crate::parser::SourceTree::new(&transformed_query) - .ok() - .as_ref() - .and_then(super::cte::split_with_query) - { - format!( - "{}, __ggsql_stat__ AS ({}) SELECT *, {} FROM __ggsql_stat__", - cte_prefix, - trailing_select, - stat_rename_exprs.join(", ") - ) - } else { - format!( - "SELECT *, {} FROM ({}) AS __ggsql_stat__", - stat_rename_exprs.join(", "), - transformed_query - ) - } + format!( + "SELECT *, {} FROM ({}) AS __ggsql_stat__", + stat_rename_exprs.join(", "), + transformed_query + ) } } StatResult::Identity => query, @@ -659,7 +642,7 @@ where /// Build a VALUES clause for an annotation layer with all aesthetic columns. /// -/// Generates SQL like: `(VALUES (val1_row1, val2_row1), (val1_row2, val2_row2)) AS t(col1, col2)` +/// Generates SQL like: `WITH t(col1, col2) AS (VALUES (...), (...)) SELECT * FROM t` /// /// This function: /// 1. Moves positional/required/array parameters from layer.parameters to layer.mappings @@ -679,7 +662,7 @@ where /// # Returns /// /// A complete SQL expression ready to use as a FROM clause -fn process_annotation_layer(layer: &mut Layer) -> Result { +fn process_annotation_layer(layer: &mut Layer, dialect: &dyn SqlDialect) -> Result { use crate::plot::ArrayElement; // Step 1: Identify which parameters to use for annotation data @@ -788,14 +771,15 @@ fn process_annotation_layer(layer: &mut Layer) -> Result { } // Step 4: Build VALUES rows - let mut rows = Vec::new(); - for i in 0..max_length { - let values: Vec = columns.iter().map(|col| col[i].to_sql()).collect(); - rows.push(format!("({})", values.join(", "))); - } + let values_clause = (0..max_length) + .map(|i| { + let row: Vec = columns.iter().map(|col| col[i].to_sql(dialect)).collect(); + format!("({})", row.join(", ")) + }) + .collect::>() + .join(", "); // Step 5: Build complete SQL query - let values_clause = rows.join(", "); let column_list = column_names .iter() .map(|c| format!("\"{}\"", c)) @@ -803,8 +787,8 @@ fn process_annotation_layer(layer: &mut Layer) -> Result { .join(", "); let sql = format!( - "SELECT * FROM (VALUES {}) AS t({})", - values_clause, column_list + "WITH __ggsql_values__({}) AS (VALUES {}) SELECT * FROM __ggsql_values__", + column_list, values_clause ); Ok(sql) @@ -885,6 +869,7 @@ pub fn resolve_orientations( mod tests { use super::*; use crate::plot::{ArrayElement, DataSource, Geom, Layer, ParameterValue}; + use crate::reader::AnsiDialect; #[test] fn test_annotation_single_scalar() { @@ -902,12 +887,11 @@ mod tests { ParameterValue::String("Test".to_string()), ); - let result = process_annotation_layer(&mut layer).unwrap(); + let result = process_annotation_layer(&mut layer, &AnsiDialect).unwrap(); - // Should produce: SELECT * FROM (VALUES (...)) AS t("pos1", "pos2", "label") - // Uses raw aesthetic names, not prefixed names - assert!(result.contains("VALUES")); + // Uses CTE form: WITH __ggsql_values__(cols) AS (VALUES (...)) SELECT * FROM __ggsql_values__ // Check all values are present (order may vary due to HashMap) + assert!(result.contains("VALUES")); assert!(result.contains("5")); assert!(result.contains("10")); assert!(result.contains("'Test'")); @@ -942,7 +926,7 @@ mod tests { ParameterValue::String("Same".to_string()), ); - let result = process_annotation_layer(&mut layer).unwrap(); + let result = process_annotation_layer(&mut layer, &AnsiDialect).unwrap(); // Should recycle scalar pos2 and label to match array length (3) assert!(result.contains("VALUES")); @@ -950,7 +934,7 @@ mod tests { assert!(result.contains("1") && result.contains("2") && result.contains("3")); assert!(result.contains("10")); assert!(result.contains("'Same'")); - // Check row count by counting parentheses pairs in VALUES + // Check row count by counting value tuples (3 rows) assert_eq!(result.matches("), (").count() + 1, 3, "Should have 3 rows"); } @@ -971,7 +955,7 @@ mod tests { ParameterValue::Array(vec![ArrayElement::Number(10.0), ArrayElement::Number(20.0)]), ); - let result = process_annotation_layer(&mut layer); + let result = process_annotation_layer(&mut layer, &AnsiDialect); // Should error with mismatched lengths assert!(result.is_err()); @@ -1000,12 +984,13 @@ mod tests { ParameterValue::Array(vec![ArrayElement::Number(10.0), ArrayElement::Number(20.0)]), ); - let result = process_annotation_layer(&mut layer).unwrap(); + let result = process_annotation_layer(&mut layer, &AnsiDialect).unwrap(); // Both arrays have length 2, should work (order may vary) assert!(result.contains("VALUES")); assert!(result.contains("1") && result.contains("2")); assert!(result.contains("10") && result.contains("20")); + // Check row count by counting value tuples (2 rows) assert_eq!(result.matches("), (").count() + 1, 2, "Should have 2 rows"); } @@ -1024,7 +1009,7 @@ mod tests { ParameterValue::String("Text".to_string()), ); - let result = process_annotation_layer(&mut layer).unwrap(); + let result = process_annotation_layer(&mut layer, &AnsiDialect).unwrap(); // Should handle different types (order may vary) assert!(result.contains("VALUES")); diff --git a/src/execute/mod.rs b/src/execute/mod.rs index 604dabef..edeb8140 100644 --- a/src/execute/mod.rs +++ b/src/execute/mod.rs @@ -975,7 +975,7 @@ pub fn prepare_data_with_reader(query: &str, reader: &R) -> Result = specs[0] .layers .iter_mut() - .map(|l| layer::layer_source_query(l, &materialized_ctes, has_global_table)) + .map(|l| layer::layer_source_query(l, &materialized_ctes, has_global_table, dialect)) .collect::>>()?; // Get types for each layer from source queries (Phase 1: types only, no min/max yet) @@ -1076,7 +1076,12 @@ pub fn prepare_data_with_reader(query: &str, reader: &R) -> Result String { + pub fn to_sql(&self, dialect: &dyn SqlDialect) -> String { match self { Self::String(s) => format!("'{}'", s.replace('\'', "''")), Self::Number(n) => n.to_string(), - Self::Boolean(b) => { - if *b { - "TRUE".to_string() - } else { - "FALSE".to_string() - } - } - Self::Date(d) => { - // Convert days since epoch to DATE - // Use CAST to ensure result is DATE type, not TIMESTAMP - format!("CAST(DATE '1970-01-01' + INTERVAL {} DAY AS DATE)", d) - } - Self::DateTime(dt) => { - // Convert microseconds since epoch to TIMESTAMP - format!( - "TIMESTAMP '1970-01-01 00:00:00' + INTERVAL {} MICROSECOND", - dt - ) - } - Self::Time(t) => { - // Convert nanoseconds since midnight to TIME - let seconds = t / 1_000_000_000; - let nanos = t % 1_000_000_000; - format!( - "TIME '00:00:00' + INTERVAL {} SECOND + INTERVAL {} NANOSECOND", - seconds, nanos - ) - } + Self::Boolean(b) => dialect.sql_boolean_literal(*b), + Self::Date(d) => dialect.sql_date_literal(*d), + Self::DateTime(dt) => dialect.sql_datetime_literal(*dt), + Self::Time(t) => dialect.sql_time_literal(*t), Self::Null => "NULL".to_string(), } } @@ -923,17 +900,11 @@ impl ParameterValue { /// /// Only supports scalar values (String, Number, Boolean, Null). /// Arrays are handled separately in annotation layer VALUES clause generation. - pub fn to_sql(&self) -> String { + pub fn to_sql(&self, dialect: &dyn SqlDialect) -> String { match self { ParameterValue::String(s) => format!("'{}'", s.replace('\'', "''")), ParameterValue::Number(n) => n.to_string(), - ParameterValue::Boolean(b) => { - if *b { - "TRUE".to_string() - } else { - "FALSE".to_string() - } - } + ParameterValue::Boolean(b) => dialect.sql_boolean_literal(*b), ParameterValue::Array(_) => { panic!("ParameterValue::to_sql() does not support arrays. Arrays in annotation layers should be handled via VALUES clause generation.") } diff --git a/src/reader/connection.rs b/src/reader/connection.rs index e592d5cc..63f90cf7 100644 --- a/src/reader/connection.rs +++ b/src/reader/connection.rs @@ -11,8 +11,6 @@ pub enum ConnectionInfo { DuckDBMemory, /// DuckDB file-based database DuckDBFile(String), - /// Polars in-memory SQL context - PolarsMemory, /// PostgreSQL connection #[allow(dead_code)] PostgreSQL(String), @@ -58,18 +56,6 @@ pub fn parse_connection_string(uri: &str) -> Result { return Ok(ConnectionInfo::DuckDBFile(cleaned_path.to_string())); } - if uri == "polars://" || uri == "polars://memory" { - return Ok(ConnectionInfo::PolarsMemory); - } - - if uri.starts_with("polars://") { - // Polars only supports in-memory mode - return Err(GgsqlError::ReaderError( - "Polars reader only supports in-memory mode. Use 'polars://memory' or 'polars://'" - .to_string(), - )); - } - if uri.starts_with("postgres://") || uri.starts_with("postgresql://") { return Ok(ConnectionInfo::PostgreSQL(uri.to_string())); } @@ -85,7 +71,7 @@ pub fn parse_connection_string(uri: &str) -> Result { } Err(GgsqlError::ReaderError(format!( - "Unsupported connection string format: {}. Supported: duckdb://, polars://, postgres://, sqlite://", + "Unsupported connection string format: {}. Supported: duckdb://, postgres://, sqlite://", uri ))) } @@ -141,28 +127,6 @@ mod tests { assert_eq!(info, ConnectionInfo::SQLite("data.db".to_string())); } - #[test] - fn test_polars_memory() { - let info = parse_connection_string("polars://memory").unwrap(); - assert_eq!(info, ConnectionInfo::PolarsMemory); - } - - #[test] - fn test_polars_empty() { - let info = parse_connection_string("polars://").unwrap(); - assert_eq!(info, ConnectionInfo::PolarsMemory); - } - - #[test] - fn test_polars_file_not_supported() { - let result = parse_connection_string("polars://data.db"); - assert!(result.is_err()); - assert!(result - .unwrap_err() - .to_string() - .contains("only supports in-memory")); - } - #[test] fn test_empty_duckdb_path() { let result = parse_connection_string("duckdb://"); diff --git a/src/reader/mod.rs b/src/reader/mod.rs index b74c7755..cc27b392 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -163,6 +163,41 @@ pub trait SqlDialect { ))" ) } + + /// SQL literal for a date value (days since Unix epoch). + fn sql_date_literal(&self, days_since_epoch: i32) -> String { + format!( + "CAST(DATE '1970-01-01' + INTERVAL {} DAY AS DATE)", + days_since_epoch + ) + } + + /// SQL literal for a datetime value (microseconds since Unix epoch). + fn sql_datetime_literal(&self, microseconds_since_epoch: i64) -> String { + format!( + "TIMESTAMP '1970-01-01 00:00:00' + INTERVAL {} MICROSECOND", + microseconds_since_epoch + ) + } + + /// SQL literal for a time value (nanoseconds since midnight). + fn sql_time_literal(&self, nanoseconds_since_midnight: i64) -> String { + let seconds = nanoseconds_since_midnight / 1_000_000_000; + let nanos = nanoseconds_since_midnight % 1_000_000_000; + format!( + "TIME '00:00:00' + INTERVAL {} SECOND + INTERVAL {} NANOSECOND", + seconds, nanos + ) + } + + /// SQL literal for a boolean value. + fn sql_boolean_literal(&self, value: bool) -> String { + if value { + "TRUE".to_string() + } else { + "FALSE".to_string() + } + } } pub struct AnsiDialect; @@ -171,9 +206,6 @@ impl SqlDialect for AnsiDialect {} #[cfg(feature = "duckdb")] pub mod duckdb; -#[cfg(feature = "polars-sql")] -pub mod polars_sql; - #[cfg(feature = "sqlite")] pub mod sqlite; @@ -184,9 +216,6 @@ mod spec; #[cfg(feature = "duckdb")] pub use duckdb::DuckDBReader; -#[cfg(feature = "polars-sql")] -pub use polars_sql::PolarsReader; - #[cfg(feature = "sqlite")] pub use sqlite::SqliteReader; diff --git a/src/reader/polars_sql.rs b/src/reader/polars_sql.rs deleted file mode 100644 index 4bf69868..00000000 --- a/src/reader/polars_sql.rs +++ /dev/null @@ -1,512 +0,0 @@ -//! Polars SQL context data source implementation -//! -//! Provides a reader that uses Polars' built-in SQL context for querying DataFrames. - -use crate::reader::Reader; -use crate::{DataFrame, GgsqlError, Result}; -use polars::prelude::*; -use polars::sql::SQLContext; -use std::cell::RefCell; -use std::collections::HashSet; - -/// Polars SQL context reader -/// -/// Executes SQL queries against registered Polars DataFrames using Polars' built-in -/// SQL context. This is a pure in-memory reader with no external database connection. -/// -/// # Examples -/// -/// ```rust,ignore -/// use ggsql::reader::{Reader, PolarsReader}; -/// use polars::prelude::*; -/// -/// // Create an in-memory reader -/// let mut reader = PolarsReader::from_connection_string("polars://memory")?; -/// -/// // Register a DataFrame -/// let df = df! { -/// "x" => [1, 2, 3], -/// "y" => [10, 20, 30], -/// }?; -/// reader.register("data", df, false)?; -/// -/// // Query it with SQL -/// let result = reader.execute_sql("SELECT * FROM data WHERE x > 1")?; -/// ``` -pub struct PolarsReader { - ctx: RefCell, - registered_tables: RefCell>, -} - -impl PolarsReader { - /// Create a new Polars reader from a connection string - /// - /// # Arguments - /// - /// * `uri` - Connection string (e.g., "polars://memory" or "polars://") - /// - /// # Returns - /// - /// A configured Polars reader with an empty SQL context - /// - /// # Errors - /// - /// Returns an error if the connection string format is invalid - pub fn from_connection_string(uri: &str) -> Result { - let conn_info = super::connection::parse_connection_string(uri)?; - - match conn_info { - super::connection::ConnectionInfo::PolarsMemory => Ok(Self { - ctx: RefCell::new(SQLContext::new()), - registered_tables: RefCell::new(HashSet::new()), - }), - _ => Err(GgsqlError::ReaderError(format!( - "Connection string '{}' is not supported by PolarsReader", - uri - ))), - } - } - - /// Create a new Polars reader with default settings - /// - /// Equivalent to `from_connection_string("polars://memory")` - pub fn new() -> Self { - Self { - ctx: RefCell::new(SQLContext::new()), - registered_tables: RefCell::new(HashSet::new()), - } - } - - /// Check if a table is registered - fn table_exists(&self, name: &str) -> bool { - self.registered_tables.borrow().contains(name) - } - - /// List registered table names - /// - /// When `internal` is false, filters out internal tables (prefixed with `__ggsql_`). - pub fn list_tables(&self, internal: bool) -> Vec { - self.registered_tables - .borrow() - .iter() - .filter(|name| internal || !name.starts_with("__ggsql_")) - .cloned() - .collect() - } -} - -impl Default for PolarsReader { - fn default() -> Self { - Self::new() - } -} - -/// Validate a table name -fn validate_table_name(name: &str) -> Result<()> { - if name.is_empty() { - return Err(GgsqlError::ReaderError("Table name cannot be empty".into())); - } - - // Reject characters that could break identifiers or cause issues - let forbidden = ['"', '\0', '\n', '\r']; - for ch in forbidden { - if name.contains(ch) { - return Err(GgsqlError::ReaderError(format!( - "Table name '{}' contains invalid character '{}'", - name, - ch.escape_default() - ))); - } - } - - // Reasonable length limit - if name.len() > 128 { - return Err(GgsqlError::ReaderError(format!( - "Table name '{}' exceeds maximum length of 128 characters", - name - ))); - } - - Ok(()) -} - -impl Reader for PolarsReader { - fn execute_sql(&self, sql: &str) -> Result { - // Check if this is a DDL statement - Polars SQL context doesn't support DDL - let trimmed = sql.trim().to_uppercase(); - let is_ddl = trimmed.starts_with("CREATE ") - || trimmed.starts_with("DROP ") - || trimmed.starts_with("INSERT ") - || trimmed.starts_with("UPDATE ") - || trimmed.starts_with("DELETE ") - || trimmed.starts_with("ALTER "); - - if is_ddl { - return Err(GgsqlError::ReaderError( - format!("Polars SQL context does not support DDL statements. Use register() to add tables. {}", sql) - )); - } - - // Handle ggsql:name namespaced identifiers (builtin datasets) - #[cfg(feature = "builtin-data")] - { - let dataset_names = super::data::extract_builtin_dataset_names(sql)?; - for name in &dataset_names { - let table_name = crate::naming::builtin_data_table(name); - if !self.table_exists(&table_name) { - let df = super::data::load_builtin_dataframe(name)?; - self.register(&table_name, df, true)?; - } - } - } - - // Rewrite ggsql:name → __ggsql_data_name__ in SQL - let sql = super::data::rewrite_namespaced_sql(sql)?; - - // Execute the query - this returns a LazyFrame - let lazy_frame = self.ctx.borrow_mut().execute(&sql).map_err(|e| { - GgsqlError::ReaderError(format!("Failed to execute SQL `{}`: {}", sql, e)) - })?; - - // Collect the LazyFrame into a DataFrame - let df = lazy_frame.collect().map_err(|e| { - GgsqlError::ReaderError(format!("Failed to collect query result: {}", e)) - })?; - - Ok(df) - } - - fn register(&self, name: &str, df: DataFrame, replace: bool) -> Result<()> { - // Validate table name - validate_table_name(name)?; - - // Handle existing table - if self.table_exists(name) { - if replace { - // Unregister existing table first - self.ctx.borrow_mut().unregister(name); - self.registered_tables.borrow_mut().remove(name); - } else { - return Err(GgsqlError::ReaderError(format!( - "Table '{}' already exists", - name - ))); - } - } - - // Register the DataFrame with the SQL context - // Polars SQLContext takes a LazyFrame - self.ctx.borrow_mut().register(name, df.lazy()); - - // Track the table so we can unregister it later - self.registered_tables.borrow_mut().insert(name.to_string()); - - Ok(()) - } - - fn unregister(&self, name: &str) -> Result<()> { - // Only allow unregistering tables we created via register() - if !self.registered_tables.borrow().contains(name) { - return Err(GgsqlError::ReaderError(format!( - "Table '{}' was not registered via this reader", - name - ))); - } - - // Unregister from the SQL context - self.ctx.borrow_mut().unregister(name); - - // Remove from tracking - self.registered_tables.borrow_mut().remove(name); - - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_create_reader() { - let reader = PolarsReader::from_connection_string("polars://memory"); - assert!(reader.is_ok()); - } - - #[test] - fn test_create_reader_default() { - let _reader = PolarsReader::new(); - } - - #[test] - fn test_register_and_query() { - let reader = PolarsReader::new(); - - let df = df! { - "x" => [1i32, 2, 3], - "y" => [10i32, 20, 30], - } - .unwrap(); - - reader.register("my_table", df, false).unwrap(); - - let result = reader - .execute_sql("SELECT * FROM my_table ORDER BY x") - .unwrap(); - assert_eq!(result.shape(), (3, 2)); - assert_eq!(result.get_column_names(), vec!["x", "y"]); - } - - #[test] - fn test_register_and_filter() { - let reader = PolarsReader::new(); - - let df = df! { - "x" => [1i32, 2, 3, 4, 5], - "y" => [10i32, 20, 30, 40, 50], - } - .unwrap(); - - reader.register("data", df, false).unwrap(); - - let result = reader - .execute_sql("SELECT * FROM data WHERE x > 2") - .unwrap(); - assert_eq!(result.height(), 3); - } - - #[test] - fn test_register_duplicate_name_errors() { - let reader = PolarsReader::new(); - - let df1 = df! { "a" => [1i32] }.unwrap(); - let df2 = df! { "b" => [2i32] }.unwrap(); - - // First registration should succeed - reader.register("dup_table", df1, false).unwrap(); - - // Second registration with same name should fail (when replace=false) - let result = reader.register("dup_table", df2, false); - assert!(result.is_err()); - let err = result.unwrap_err().to_string(); - assert!(err.contains("already exists")); - } - - #[test] - fn test_register_invalid_table_names() { - let reader = PolarsReader::new(); - let df = df! { "a" => [1i32] }.unwrap(); - - // Empty name - let result = reader.register("", df.clone(), false); - assert!(result.is_err()); - assert!(result.unwrap_err().to_string().contains("cannot be empty")); - - // Name with double quote - let result = reader.register("bad\"name", df.clone(), false); - assert!(result.is_err()); - assert!(result - .unwrap_err() - .to_string() - .contains("invalid character")); - - // Name with null byte - let result = reader.register("bad\0name", df.clone(), false); - assert!(result.is_err()); - assert!(result - .unwrap_err() - .to_string() - .contains("invalid character")); - - // Name too long - let long_name = "a".repeat(200); - let result = reader.register(&long_name, df, false); - assert!(result.is_err()); - assert!(result - .unwrap_err() - .to_string() - .contains("exceeds maximum length")); - } - - #[test] - fn test_unregister() { - let reader = PolarsReader::new(); - let df = df! { "x" => [1i32, 2, 3] }.unwrap(); - - reader.register("test_data", df, false).unwrap(); - - // Should be queryable - let result = reader.execute_sql("SELECT * FROM test_data").unwrap(); - assert_eq!(result.height(), 3); - - // Unregister - reader.unregister("test_data").unwrap(); - - // Should no longer exist - let result = reader.execute_sql("SELECT * FROM test_data"); - assert!(result.is_err()); - } - - #[test] - fn test_unregister_not_registered() { - let reader = PolarsReader::new(); - - // Should fail - we didn't register anything - let result = reader.unregister("nonexistent"); - assert!(result.is_err()); - let err = result.unwrap_err().to_string(); - assert!(err.contains("was not registered via this reader")); - } - - #[test] - fn test_reregister_after_unregister() { - let reader = PolarsReader::new(); - let df = df! { "x" => [1i32, 2, 3] }.unwrap(); - - reader.register("data", df.clone(), false).unwrap(); - reader.unregister("data").unwrap(); - - // Should be able to register again - reader.register("data", df, false).unwrap(); - let result = reader.execute_sql("SELECT * FROM data").unwrap(); - assert_eq!(result.height(), 3); - } - - #[test] - fn test_invalid_sql() { - let reader = PolarsReader::new(); - let result = reader.execute_sql("INVALID SQL SYNTAX"); - assert!(result.is_err()); - } - - #[test] - fn test_ddl_not_supported() { - let reader = PolarsReader::new(); - - // CREATE should fail - let result = reader.execute_sql("CREATE TABLE test (x INT)"); - assert!(result.is_err()); - assert!(result.unwrap_err().to_string().contains("DDL")); - - // DROP should fail - let result = reader.execute_sql("DROP TABLE test"); - assert!(result.is_err()); - - // INSERT should fail - let result = reader.execute_sql("INSERT INTO test VALUES (1)"); - assert!(result.is_err()); - } - - #[test] - fn test_query_with_aggregation() { - let reader = PolarsReader::new(); - - let df = df! { - "region" => ["US", "US", "EU"], - "revenue" => [100.0f64, 200.0, 150.0], - } - .unwrap(); - - reader.register("sales", df, false).unwrap(); - - let result = reader - .execute_sql("SELECT region, SUM(revenue) as total FROM sales GROUP BY region") - .unwrap(); - - assert_eq!(result.shape(), (2, 2)); - assert_eq!(result.get_column_names(), vec!["region", "total"]); - } - - #[test] - fn test_multiple_tables() { - let reader = PolarsReader::new(); - - let sales = df! { - "id" => [1i32, 2, 3], - "amount" => [100i32, 200, 300], - "product_id" => [1i32, 1, 2], - } - .unwrap(); - - let products = df! { - "id" => [1i32, 2], - "name" => ["Widget", "Gadget"], - } - .unwrap(); - - reader.register("sales", sales, false).unwrap(); - reader.register("products", products, false).unwrap(); - - let result = reader - .execute_sql( - "SELECT s.id, s.amount, p.name - FROM sales s - JOIN products p ON s.product_id = p.id", - ) - .unwrap(); - - assert_eq!(result.height(), 3); - } - - #[test] - fn test_namespaced_sql_with_preregistered_data() { - use crate::naming; - - let reader = PolarsReader::new(); - - let df = df! { - "x" => [1i32, 2, 3], - "y" => [10i32, 20, 30], - } - .unwrap(); - - // Register under the internal table name that ggsql:penguins rewrites to - let table_name = naming::builtin_data_table("penguins"); - reader.register(&table_name, df, false).unwrap(); - - // ggsql:penguins should be rewritten to __ggsql_data_penguins__ and resolve - let result = reader.execute_sql("SELECT * FROM ggsql:penguins").unwrap(); - assert_eq!(result.height(), 3); - } - - #[test] - fn test_namespaced_sql_without_registration_errors() { - let reader = PolarsReader::new(); - - // Without builtin-data feature and without pre-registration, should error - // (when builtin-data is enabled, this test still passes because - // the dataset gets auto-loaded) - let result = reader.execute_sql("SELECT * FROM ggsql:unknown_dataset"); - // Either errors from "not pre-loaded" or from SQL execution failing - assert!(result.is_err()); - } -} - -#[cfg(feature = "builtin-data")] -#[cfg(test)] -mod builtin_data_tests { - use super::*; - - #[test] - fn test_builtin_penguins_auto_loads() { - let reader = PolarsReader::new(); - - // ggsql:penguins should auto-load from embedded parquet - let result = reader - .execute_sql("SELECT * FROM ggsql:penguins LIMIT 5") - .unwrap(); - assert_eq!(result.height(), 5); - assert!(result.width() > 0); - } - - #[test] - fn test_builtin_airquality_auto_loads() { - let reader = PolarsReader::new(); - - let result = reader - .execute_sql("SELECT * FROM ggsql:airquality LIMIT 5") - .unwrap(); - assert_eq!(result.height(), 5); - assert!(result.width() > 0); - } -} diff --git a/src/reader/sqlite.rs b/src/reader/sqlite.rs index 09d8b015..503da0cc 100644 --- a/src/reader/sqlite.rs +++ b/src/reader/sqlite.rs @@ -45,6 +45,28 @@ impl super::SqlDialect for SqliteDialect { fn time_type_name(&self) -> Option<&str> { Some("TEXT") } + + fn sql_date_literal(&self, days_since_epoch: i32) -> String { + format!("date('1970-01-01', '+{} days')", days_since_epoch) + } + + fn sql_datetime_literal(&self, microseconds_since_epoch: i64) -> String { + let seconds = microseconds_since_epoch as f64 / 1_000_000.0; + format!("datetime('1970-01-01 00:00:00', '+{} seconds')", seconds) + } + + fn sql_time_literal(&self, nanoseconds_since_midnight: i64) -> String { + let seconds = nanoseconds_since_midnight as f64 / 1_000_000_000.0; + format!("time('00:00:00', '+{} seconds')", seconds) + } + + fn sql_boolean_literal(&self, value: bool) -> String { + if value { + "1".to_string() + } else { + "0".to_string() + } + } } /// SQLite database reader