From a925781ab31d6ad4999b1711817f1119bf9b4cb9 Mon Sep 17 00:00:00 2001 From: Jiayi Wang Date: Mon, 16 Mar 2026 14:12:55 +0000 Subject: [PATCH 1/2] Fix int overflow --- .../parquet/column/statistics/BinaryStatistics.java | 6 +++--- .../parquet/column/statistics/TestStatistics.java | 12 ++++++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/parquet-column/src/main/java/org/apache/parquet/column/statistics/BinaryStatistics.java b/parquet-column/src/main/java/org/apache/parquet/column/statistics/BinaryStatistics.java index 87d39bf16e..9488a38494 100644 --- a/parquet-column/src/main/java/org/apache/parquet/column/statistics/BinaryStatistics.java +++ b/parquet-column/src/main/java/org/apache/parquet/column/statistics/BinaryStatistics.java @@ -105,7 +105,7 @@ String stringify(Binary value) { @Override public boolean isSmallerThan(long size) { - return !hasNonNullValue() || ((min.length() + max.length()) < size); + return !hasNonNullValue() || (((long) min.length() + max.length()) < size); } public boolean isSmallerThanWithTruncation(long size, int truncationLength) { @@ -113,8 +113,8 @@ public boolean isSmallerThanWithTruncation(long size, int truncationLength) { return true; } - int minTruncateLength = Math.min(min.length(), truncationLength); - int maxTruncateLength = Math.min(max.length(), truncationLength); + long minTruncateLength = Math.min(min.length(), truncationLength); + long maxTruncateLength = Math.min(max.length(), truncationLength); return minTruncateLength + maxTruncateLength < size; } diff --git a/parquet-column/src/test/java/org/apache/parquet/column/statistics/TestStatistics.java b/parquet-column/src/test/java/org/apache/parquet/column/statistics/TestStatistics.java index dec244f629..044b13c1b2 100644 --- a/parquet-column/src/test/java/org/apache/parquet/column/statistics/TestStatistics.java +++ b/parquet-column/src/test/java/org/apache/parquet/column/statistics/TestStatistics.java @@ -927,4 +927,16 @@ public void testNoopStatistics() { assertThrows(UnsupportedOperationException.class, stats::minAsString); assertThrows(UnsupportedOperationException.class, () -> stats.isSmallerThan(0)); } + + @Test + public void testBinaryIsSmallerThanNoOverflowForLargeValues() { + PrimitiveType type = Types.required(BINARY).named("test_binary"); + Statistics stats = Statistics.getBuilderForReading(type).build(); + + byte[] largeValue = new byte[1_073_741_824]; // 2^30 = 1 GB + stats.setMinMaxFromBytes(largeValue, largeValue); + + // min.length() + max.length() = 2^31, must not overflow int to negative + assertFalse(stats.isSmallerThan(4096)); + } } From 9d92fd6ab929eef233c547bacf8d0703051cd28f Mon Sep 17 00:00:00 2001 From: Jiayi Date: Tue, 17 Mar 2026 10:32:08 +0000 Subject: [PATCH 2/2] Avoid 1 GB allocation in overflow test Use Binary.fromConstantByteArray with a fake large length instead of allocating a real 1 GB byte array. The test only exercises length() arithmetic in isSmallerThan, so the backing bytes are never accessed. --- .../apache/parquet/column/statistics/TestStatistics.java | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/parquet-column/src/test/java/org/apache/parquet/column/statistics/TestStatistics.java b/parquet-column/src/test/java/org/apache/parquet/column/statistics/TestStatistics.java index 044b13c1b2..92eaa7a302 100644 --- a/parquet-column/src/test/java/org/apache/parquet/column/statistics/TestStatistics.java +++ b/parquet-column/src/test/java/org/apache/parquet/column/statistics/TestStatistics.java @@ -930,11 +930,10 @@ public void testNoopStatistics() { @Test public void testBinaryIsSmallerThanNoOverflowForLargeValues() { - PrimitiveType type = Types.required(BINARY).named("test_binary"); - Statistics stats = Statistics.getBuilderForReading(type).build(); - - byte[] largeValue = new byte[1_073_741_824]; // 2^30 = 1 GB - stats.setMinMaxFromBytes(largeValue, largeValue); + BinaryStatistics stats = new BinaryStatistics(); + // Create a Binary whose length() reports 2^30 without allocating 1 GB + Binary fakeLarge = Binary.fromConstantByteArray(new byte[0], 0, 1 << 30); + stats.updateStats(fakeLarge); // min.length() + max.length() = 2^31, must not overflow int to negative assertFalse(stats.isSmallerThan(4096));