From 72a6bfa6f9442718a48ada6d21bbc0f95006b31f Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Fri, 15 May 2026 16:01:17 +0800 Subject: [PATCH 1/3] [SPARK-56874][SQL] Fast-path DateTimeUtils.daysToMicros for ZoneOffset.UTC `daysToMicros(days, zoneId)` allocates `LocalDate`, `ZonedDateTime`, and `Instant` per call. For `ZoneOffset.UTC` the answer is simply `days * MICROS_PER_DAY`, so add a reference-equality fast path that returns `Math.multiplyExact(days.toLong, MICROS_PER_DAY)`. Every real Spark caller passing UTC -- the vectorized parquet `DateToTimestampNTZUpdater` and its rebase variants, the row-based `ParquetRowConverter`, the Avro reader, and `Cast` (interpreted + codegen) -- uses the `ZoneOffset.UTC` singleton, so the fast path triggers everywhere it matters. --- .../catalyst/util/SparkDateTimeUtils.scala | 11 +++++--- .../catalyst/util/DateTimeUtilsSuite.scala | 26 +++++++++++++++++++ 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala index 9ce9d14ed3161..988c8b1b51767 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala @@ -228,11 +228,16 @@ trait SparkDateTimeUtils { /** * Converts days since 1970-01-01 at the given zone ID to microseconds since 1970-01-01 - * 00:00:00Z. + * 00:00:00Z. When `zoneId eq ZoneOffset.UTC`, takes a direct-multiply fast path that + * skips the `LocalDate`/`ZonedDateTime`/`Instant` chain. */ def daysToMicros(days: Int, zoneId: ZoneId): Long = { - val instant = daysToLocalDate(days).atStartOfDay(zoneId).toInstant - instantToMicros(instant) + if (zoneId eq ZoneOffset.UTC) { + Math.multiplyExact(days.toLong, MICROS_PER_DAY) + } else { + val instant = daysToLocalDate(days).atStartOfDay(zoneId).toInstant + instantToMicros(instant) + } } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 4aa03d9f8daa0..4810ec69bb96e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -971,6 +971,32 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { assert(DateTimeUtils.microsToMillis(-157700927876544L) === -157700927877L) } + test("daysToMicros: ZoneOffset.UTC fast path matches the generic zone path") { + // The UTC fast path returns `days * MICROS_PER_DAY` directly; assert it agrees with the + // `LocalDate -> ZonedDateTime -> Instant` path used for any other zone whose offset is 0 + // (e.g. `Etc/GMT`). Covers zero, positive, negative, and values bounded by the largest + // `days` for which `days * MICROS_PER_DAY` does not overflow `Long`. + val maxSafeDays = (Long.MaxValue / MICROS_PER_DAY).toInt + val cases = Seq(0, 1, -1, 365, -365, 16800, -16800, 1_000_000, -1_000_000, + maxSafeDays, -maxSafeDays) + val gmt = ZoneId.of("Etc/GMT") + cases.foreach { d => + assert(daysToMicros(d, ZoneOffset.UTC) === daysToMicros(d, gmt), + s"UTC fast path diverged from Etc/GMT path at days=$d") + assert(daysToMicros(d, ZoneOffset.UTC) === d.toLong * MICROS_PER_DAY, + s"UTC fast path != days * MICROS_PER_DAY at days=$d") + } + + // Overflow: any `days` past `maxSafeDays` overflows `Long` and must throw rather than + // silently wrap. + intercept[ArithmeticException] { + daysToMicros(maxSafeDays + 1, ZoneOffset.UTC) + } + intercept[ArithmeticException] { + daysToMicros(-maxSafeDays - 1, ZoneOffset.UTC) + } + } + test("SPARK-29012: special timestamp values") { testSpecialDatetimeValues { zoneId => val tolerance = TimeUnit.SECONDS.toMicros(30) From 0e828a4c7b6cde09fba0a20fb142e2abaadeb5c8 Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Fri, 15 May 2026 19:35:48 +0800 Subject: [PATCH 2/3] [SPARK-56874][SQL][FOLLOWUP] Apply scalafmt to daysToMicros docstring --- .../apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala index 988c8b1b51767..9684737a22865 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala @@ -228,8 +228,8 @@ trait SparkDateTimeUtils { /** * Converts days since 1970-01-01 at the given zone ID to microseconds since 1970-01-01 - * 00:00:00Z. When `zoneId eq ZoneOffset.UTC`, takes a direct-multiply fast path that - * skips the `LocalDate`/`ZonedDateTime`/`Instant` chain. + * 00:00:00Z. When `zoneId eq ZoneOffset.UTC`, takes a direct-multiply fast path that skips the + * `LocalDate`/`ZonedDateTime`/`Instant` chain. */ def daysToMicros(days: Int, zoneId: ZoneId): Long = { if (zoneId eq ZoneOffset.UTC) { From 647bcffe13067d753e50adbf4f628c993e48d991 Mon Sep 17 00:00:00 2001 From: LuciferYang Date: Fri, 15 May 2026 12:37:05 +0000 Subject: [PATCH 3/3] Benchmark results for org.apache.spark.sql.execution.datasources.parquet.ParquetVectorUpdaterBenchmark (JDK 17, Scala 2.13, split 1 of 1) --- .../ParquetVectorUpdaterBenchmark-results.txt | 56 +++++++++---------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/sql/core/benchmarks/ParquetVectorUpdaterBenchmark-results.txt b/sql/core/benchmarks/ParquetVectorUpdaterBenchmark-results.txt index 5e2c31d2b5668..31f2aca1f54da 100644 --- a/sql/core/benchmarks/ParquetVectorUpdaterBenchmark-results.txt +++ b/sql/core/benchmarks/ParquetVectorUpdaterBenchmark-results.txt @@ -2,57 +2,57 @@ Identity Updaters ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1013-azure AMD EPYC 7763 64-Core Processor Identity Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -BooleanUpdater 0 0 0 14526.2 0.1 1.0X -ByteUpdater (INT32 -> Byte) 0 0 0 3679.3 0.3 0.3X -ShortUpdater (INT32 -> Short) 1 1 0 2054.1 0.5 0.1X -IntegerUpdater 0 0 0 10178.0 0.1 0.7X -LongUpdater 0 0 0 5054.4 0.2 0.3X -FloatUpdater 0 0 0 10212.8 0.1 0.7X -DoubleUpdater 0 0 0 5051.2 0.2 0.3X -BinaryUpdater 15 15 0 68.4 14.6 0.0X +BooleanUpdater 0 0 0 20542.2 0.0 1.0X +ByteUpdater (INT32 -> Byte) 0 0 0 3675.0 0.3 0.2X +ShortUpdater (INT32 -> Short) 1 1 0 2053.8 0.5 0.1X +IntegerUpdater 0 0 0 10229.8 0.1 0.5X +LongUpdater 0 0 0 5101.7 0.2 0.2X +FloatUpdater 0 0 0 10214.9 0.1 0.5X +DoubleUpdater 0 0 0 5106.4 0.2 0.2X +BinaryUpdater 15 15 0 68.6 14.6 0.0X ================================================================================================ Type-converting Updaters ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1013-azure AMD EPYC 7763 64-Core Processor Type-converting Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -IntegerToLongUpdater 1 1 0 1280.6 0.8 1.0X -IntegerToDoubleUpdater 1 1 0 1537.9 0.7 1.2X -FloatToDoubleUpdater 1 1 0 1418.8 0.7 1.1X -DateToTimestampNTZUpdater 36 36 0 29.5 33.9 0.0X -DowncastLongUpdater (INT64 -> Decimal(9,2)) 2 2 0 455.3 2.2 0.4X +IntegerToLongUpdater 1 1 0 1281.5 0.8 1.0X +IntegerToDoubleUpdater 1 1 0 1532.5 0.7 1.2X +FloatToDoubleUpdater 1 1 0 1419.3 0.7 1.1X +DateToTimestampNTZUpdater 3 3 0 402.2 2.5 0.3X +DowncastLongUpdater (INT64 -> Decimal(9,2)) 2 2 0 455.2 2.2 0.4X ================================================================================================ Rebase Updaters ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1013-azure AMD EPYC 7763 64-Core Processor Rebase Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------------- -IntegerWithRebaseUpdater (DATE legacy) 0 0 0 2407.3 0.4 1.0X -LongWithRebaseUpdater (TIMESTAMP_MICROS legacy) 1 1 0 2030.8 0.5 0.8X -LongAsMicrosUpdater (TIMESTAMP_MILLIS) 2 2 0 454.4 2.2 0.2X +IntegerWithRebaseUpdater (DATE legacy) 0 0 0 2602.0 0.4 1.0X +LongWithRebaseUpdater (TIMESTAMP_MICROS legacy) 1 1 0 2077.4 0.5 0.8X +LongAsMicrosUpdater (TIMESTAMP_MILLIS) 2 2 0 454.2 2.2 0.2X ================================================================================================ Unsigned Updaters ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1013-azure AMD EPYC 7763 64-Core Processor Unsigned Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ----------------------------------------------------------------------------------------------------------------------------- -UnsignedIntegerUpdater (UINT32 -> Long) 1 1 0 1093.1 0.9 1.0X +UnsignedIntegerUpdater (UINT32 -> Long) 1 1 0 1093.5 0.9 1.0X UnsignedLongUpdater (UINT64 -> Decimal(20,0)) 18 18 0 59.1 16.9 0.1X @@ -60,25 +60,25 @@ UnsignedLongUpdater (UINT64 -> Decimal(20,0)) 18 18 Decimal Updaters ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1013-azure AMD EPYC 7763 64-Core Processor Decimal Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -IntegerToDecimalUpdater 0 0 0 10195.9 0.1 1.0X -LongToDecimalUpdater 0 0 0 5049.2 0.2 0.5X -FixedLenByteArrayToDecimalUpdater 21 21 0 51.0 19.6 0.0X +IntegerToDecimalUpdater 0 0 0 10208.9 0.1 1.0X +LongToDecimalUpdater 0 0 0 5104.2 0.2 0.5X +FixedLenByteArrayToDecimalUpdater 21 21 0 51.1 19.6 0.0X ================================================================================================ FixedLenByteArray Updaters ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1013-azure AMD EPYC 7763 64-Core Processor FixedLenByteArray Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------------------- -FixedLenByteArrayUpdater (len=16 -> Binary) 19 19 0 54.9 18.2 1.0X +FixedLenByteArrayUpdater (len=16 -> Binary) 19 19 1 55.3 18.1 1.0X FixedLenByteArrayAsIntUpdater (len=4 -> Decimal(9,2)) 7 7 0 160.1 6.2 2.9X -FixedLenByteArrayAsLongUpdater (len=8 -> Decimal(18,4)) 9 9 0 123.0 8.1 2.2X +FixedLenByteArrayAsLongUpdater (len=8 -> Decimal(18,4)) 9 9 0 123.2 8.1 2.2X