Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 5 additions & 7 deletions datafusion-cli/src/exec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use crate::{
};
use datafusion::common::instant::Instant;
use datafusion::common::{plan_datafusion_err, plan_err};
use datafusion::config::ConfigFileType;
use datafusion::config::{ConfigFileType, Dialect};
use datafusion::datasource::listing::ListingTableUrl;
use datafusion::error::{DataFusionError, Result};
use datafusion::execution::memory_pool::MemoryConsumer;
Expand Down Expand Up @@ -223,9 +223,8 @@ pub(super) async fn exec_and_print(
let dialect = &options.sql_parser.dialect;
let dialect = dialect_from_str(dialect).ok_or_else(|| {
plan_datafusion_err!(
"Unsupported SQL dialect: {dialect}. Available dialects: \
Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, \
MsSQL, ClickHouse, BigQuery, Ansi, DuckDB, Databricks."
"Unsupported SQL dialect: {dialect}. Available dialects: {}.",
Dialect::AVAILABLE
)
})?;

Expand Down Expand Up @@ -613,9 +612,8 @@ mod tests {
let dialect = &task_ctx.session_config().options().sql_parser.dialect;
let dialect = dialect_from_str(dialect).ok_or_else(|| {
plan_datafusion_err!(
"Unsupported SQL dialect: {dialect}. Available dialects: \
Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, \
MsSQL, ClickHouse, BigQuery, Ansi, DuckDB, Databricks."
"Unsupported SQL dialect: {dialect}. Available dialects: {}.",
Dialect::AVAILABLE
)
})?;
for location in locations {
Expand Down
31 changes: 26 additions & 5 deletions datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ config_namespace! {
pub enable_options_value_normalization: bool, warn = "`enable_options_value_normalization` is deprecated and ignored", default = false

/// Configure the SQL dialect used by DataFusion's parser; supported values include: Generic,
/// MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB and Databricks.
/// MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB, Databricks and Spark.
pub dialect: Dialect, default = Dialect::Generic
// no need to lowercase because `sqlparser::dialect_from_str`] is case-insensitive

Expand Down Expand Up @@ -342,6 +342,13 @@ pub enum Dialect {
Ansi,
DuckDB,
Databricks,
Spark,
}

impl Dialect {
/// List of all supported dialect names, for use in error messages.
pub const AVAILABLE: &'static str = "Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, \
MsSQL, ClickHouse, BigQuery, Ansi, DuckDB, Databricks, Spark";
}

impl AsRef<str> for Dialect {
Expand All @@ -360,6 +367,7 @@ impl AsRef<str> for Dialect {
Self::Ansi => "ansi",
Self::DuckDB => "duckdb",
Self::Databricks => "databricks",
Self::Spark => "spark",
}
}
}
Expand All @@ -382,11 +390,12 @@ impl FromStr for Dialect {
"ansi" => Self::Ansi,
"duckdb" => Self::DuckDB,
"databricks" => Self::Databricks,
"spark" | "sparksql" => Self::Spark,
other => {
let error_message = format!(
"Invalid Dialect: {other}. Expected one of: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB, Databricks"
);
return Err(DataFusionError::Configuration(error_message));
return Err(DataFusionError::Configuration(format!(
"Invalid Dialect: {other}. Expected one of: {}",
Self::AVAILABLE
)));
}
};
Ok(value)
Expand Down Expand Up @@ -4090,4 +4099,16 @@ mod tests {
assert_eq!(cdc.max_chunk_size, 1024 * 1024);
assert_eq!(cdc.norm_level, 0);
}

#[test]
fn test_dialect_spark_roundtrip() {
use crate::config::Dialect;
use std::str::FromStr;

assert_eq!(Dialect::from_str("spark").unwrap(), Dialect::Spark);
assert_eq!(Dialect::from_str("sparksql").unwrap(), Dialect::Spark);
assert_eq!(Dialect::from_str("SPARK").unwrap(), Dialect::Spark);
assert_eq!(Dialect::Spark.as_ref(), "spark");
assert_eq!(Dialect::Spark.to_string(), "spark");
}
}
10 changes: 4 additions & 6 deletions datafusion/core/src/execution/session_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -437,9 +437,8 @@ impl SessionState {
) -> datafusion_common::Result<Statement> {
let dialect = dialect_from_str(dialect).ok_or_else(|| {
plan_datafusion_err!(
"Unsupported SQL dialect: {dialect}. Available dialects: \
Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, \
MsSQL, ClickHouse, BigQuery, Ansi, DuckDB, Databricks."
"Unsupported SQL dialect: {dialect}. Available dialects: {}.",
Dialect::AVAILABLE
)
})?;

Expand Down Expand Up @@ -486,9 +485,8 @@ impl SessionState {
) -> datafusion_common::Result<SQLExprWithAlias> {
let dialect = dialect_from_str(dialect).ok_or_else(|| {
plan_datafusion_err!(
"Unsupported SQL dialect: {dialect}. Available dialects: \
Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, \
MsSQL, ClickHouse, BigQuery, Ansi, DuckDB, Databricks."
"Unsupported SQL dialect: {dialect}. Available dialects: {}.",
Dialect::AVAILABLE
)
})?;

Expand Down
3 changes: 2 additions & 1 deletion datafusion/spark/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ url = { workspace = true }
arrow = { workspace = true, features = ["test_utils"] }
criterion = { workspace = true }
# for SessionStateBuilderSpark tests
datafusion = { workspace = true, default-features = false }
datafusion = { workspace = true, default-features = false, features = ["sql"] }
tokio = { workspace = true, features = ["rt"] }

[[bench]]
harness = false
Expand Down
26 changes: 26 additions & 0 deletions datafusion/spark/src/session_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ impl SessionStateBuilderSpark for SessionStateBuilder {
#[cfg(test)]
mod tests {
use super::*;
use datafusion::common::config::Dialect;
use datafusion::prelude::SessionConfig;
use datafusion::prelude::SessionContext;

#[test]
fn test_session_state_with_spark_features() {
Expand All @@ -108,4 +111,27 @@ mod tests {
"Apache Spark expr planners should be registered"
);
}

#[tokio::test]
async fn test_spark_dialect_with_spark_functions() {
let mut config = SessionConfig::new();
config.options_mut().sql_parser.dialect = Dialect::Spark;
let state = SessionStateBuilder::new()
.with_config(config)
.with_default_features()
.with_spark_features()
.build();
let ctx = SessionContext::new_with_state(state);

// Spark function + Spark dialect parsing
let result = ctx
.sql("SELECT sha2('abc', 256)")
.await
.unwrap()
.collect()
.await
.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].num_rows(), 1);
}
}
4 changes: 4 additions & 0 deletions datafusion/sqllogictest/src/test_context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ use arrow::record_batch::RecordBatch;
use datafusion::catalog::{
CatalogProvider, MemoryCatalogProvider, MemorySchemaProvider, SchemaProvider, Session,
};
use datafusion::common::config::Dialect;
use datafusion::common::{DataFusionError, Result, not_impl_err};
use datafusion::functions::math::abs;
use datafusion::logical_expr::async_udf::{AsyncScalarUDF, AsyncScalarUDFImpl};
Expand Down Expand Up @@ -112,6 +113,9 @@ impl TestContext {

if is_spark_path(relative_path) {
state_builder = state_builder.with_spark_features();
if let Some(config) = state_builder.config() {
config.options_mut().sql_parser.dialect = Dialect::Spark;
}
}

if matches!(
Expand Down
2 changes: 1 addition & 1 deletion datafusion/sqllogictest/test_files/information_schema.slt
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,7 @@ datafusion.runtime.metadata_cache_limit 50M Maximum memory to use for file metad
datafusion.runtime.temp_directory NULL The path to the temporary file directory.
datafusion.sql_parser.collect_spans false When set to true, the source locations relative to the original SQL query (i.e. [`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html)) will be collected and recorded in the logical plan nodes.
datafusion.sql_parser.default_null_ordering nulls_max Specifies the default null ordering for query results. There are 4 options: - `nulls_max`: Nulls appear last in ascending order. - `nulls_min`: Nulls appear first in ascending order. - `nulls_first`: Nulls always be first in any order. - `nulls_last`: Nulls always be last in any order. By default, `nulls_max` is used to follow Postgres's behavior. postgres rule: <https://www.postgresql.org/docs/current/queries-order.html>
datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB and Databricks.
datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB, Databricks and Spark.
datafusion.sql_parser.enable_ident_normalization true When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted)
datafusion.sql_parser.enable_options_value_normalization false When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically.
datafusion.sql_parser.enable_subquery_sort_elimination true When set to true, DataFusion may remove `ORDER BY` clauses from subqueries or CTEs during SQL planning when their ordering cannot affect the result, such as when no `LIMIT` or other order-sensitive operator depends on them. Disable this option to preserve explicit subquery ordering in the planned query.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ SELECT size(make_array(1, NULL, 3));

# NULL array returns -1 (Spark behavior)
query I
SELECT size(NULL::int[]);
SELECT size(CAST(NULL AS ARRAY<INT>));
----
-1

Expand Down
5 changes: 5 additions & 0 deletions docs/source/library-user-guide/upgrading/54.0.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -894,3 +894,8 @@ match register_function {
RegisterFunction::Table(name, table) => {},
}
```

### New `Dialect::Spark` variant

The `Dialect` enum in `datafusion_common::config` now includes a `Spark` variant.
If you match exhaustively on `Dialect`, add a `Dialect::Spark` arm.
Loading
Loading