diff --git a/cpp/src/arrow/util/byte_stream_split_internal.h b/cpp/src/arrow/util/byte_stream_split_internal.h index e237beb7913..2e713dd42f2 100644 --- a/cpp/src/arrow/util/byte_stream_split_internal.h +++ b/cpp/src/arrow/util/byte_stream_split_internal.h @@ -421,14 +421,28 @@ void ByteStreamSplitEncodeScalar(const uint8_t* raw_values, int width, DoSplitStreams(raw_values, kNumStreams, num_values, dest_streams.data()); } +// If changing this value, please check that TestByteStreamSplitLargeWidth still +// exercises the slow path. +constexpr inline int kByteStreamSplitMaxTemporaryAlloc = 8192; + inline void ByteStreamSplitEncodeScalarDynamic(const uint8_t* raw_values, int width, const int64_t num_values, uint8_t* out) { - ::arrow::internal::SmallVector dest_streams; - dest_streams.resize(width); - for (int stream = 0; stream < width; ++stream) { - dest_streams[stream] = &out[stream * num_values]; + if (ARROW_PREDICT_TRUE(width < kByteStreamSplitMaxTemporaryAlloc / 8)) { + ::arrow::internal::SmallVector dest_streams; + dest_streams.resize(width); + for (int stream = 0; stream < width; ++stream) { + dest_streams[stream] = &out[stream * num_values]; + } + DoSplitStreams(raw_values, width, num_values, dest_streams.data()); + } else { + // Slow path to avoid an oversized `dest_streams` container above. + for (int stream = 0; stream < width; ++stream) { + uint8_t* dest_stream = &out[stream * num_values]; + for (int64_t i = 0; i < num_values; ++i) { + dest_stream[i] = raw_values[stream + i * width]; + } + } } - DoSplitStreams(raw_values, width, num_values, dest_streams.data()); } template @@ -445,12 +459,22 @@ void ByteStreamSplitDecodeScalar(const uint8_t* data, int width, int64_t num_val inline void ByteStreamSplitDecodeScalarDynamic(const uint8_t* data, int width, int64_t num_values, int64_t stride, uint8_t* out) { - ::arrow::internal::SmallVector src_streams; - src_streams.resize(width); - for (int stream = 0; stream < width; ++stream) { - src_streams[stream] = &data[stream * stride]; + if (ARROW_PREDICT_TRUE(width < kByteStreamSplitMaxTemporaryAlloc / 8)) { + ::arrow::internal::SmallVector src_streams; + src_streams.resize(width); + for (int stream = 0; stream < width; ++stream) { + src_streams[stream] = &data[stream * stride]; + } + DoMergeStreams(src_streams.data(), width, num_values, out); + } else { + // Slow path to avoid an oversized `src_streams` container above. + for (int stream = 0; stream < width; ++stream) { + const uint8_t* src_stream = &data[stream * stride]; + for (int64_t i = 0; i < num_values; ++i) { + out[stream + i * width] = src_stream[i]; + } + } } - DoMergeStreams(src_streams.data(), width, num_values, out); } template diff --git a/cpp/src/arrow/util/byte_stream_split_test.cc b/cpp/src/arrow/util/byte_stream_split_test.cc index 13a99d937cb..be678c151fa 100644 --- a/cpp/src/arrow/util/byte_stream_split_test.cc +++ b/cpp/src/arrow/util/byte_stream_split_test.cc @@ -195,7 +195,7 @@ class TestByteStreamSplitSpecialized : public ::testing::Test { TYPED_TEST_SUITE(TestByteStreamSplitSpecialized, ByteStreamSplitTypes); TYPED_TEST(TestByteStreamSplitSpecialized, RoundtripSmall) { - for (int64_t num_values : {1, 5, 7, 12, 19, 31, 32}) { + for (int64_t num_values : {0, 1, 5, 7, 12, 19, 31, 32}) { this->TestRoundtrip(num_values); } } @@ -210,4 +210,13 @@ TYPED_TEST(TestByteStreamSplitSpecialized, PiecewiseDecode) { this->TestPiecewiseDecode(/*num_values=*/500); } +class TestByteStreamSplitLargeWidth + : public TestByteStreamSplitSpecialized> {}; + +TEST_F(TestByteStreamSplitLargeWidth, Roundtrip) { + for (int64_t num_values : {0, 1, 5, 100}) { + this->TestRoundtrip(num_values); + } +} + } // namespace arrow::util::internal diff --git a/cpp/src/parquet/decoder.cc b/cpp/src/parquet/decoder.cc index 50ce510bb1f..c4d3fe5a8a5 100644 --- a/cpp/src/parquet/decoder.cc +++ b/cpp/src/parquet/decoder.cc @@ -2307,11 +2307,13 @@ class ByteStreamSplitDecoderBase : public TypedDecoderImpl { protected: int DecodeRaw(uint8_t* out_buffer, int max_values) { const int values_to_decode = std::min(this->num_values_, max_values); - ::arrow::util::internal::ByteStreamSplitDecode(this->data_, this->type_length_, - values_to_decode, stride_, out_buffer); - this->data_ += values_to_decode; - this->num_values_ -= values_to_decode; - this->len_ -= this->type_length_ * values_to_decode; + if (ARROW_PREDICT_TRUE(values_to_decode > 0)) { + ::arrow::util::internal::ByteStreamSplitDecode( + this->data_, this->type_length_, values_to_decode, stride_, out_buffer); + this->data_ += values_to_decode; + this->num_values_ -= values_to_decode; + this->len_ -= this->type_length_ * values_to_decode; + } return values_to_decode; } diff --git a/cpp/src/parquet/encoder.cc b/cpp/src/parquet/encoder.cc index 2c70f63b6b8..3e469df277b 100644 --- a/cpp/src/parquet/encoder.cc +++ b/cpp/src/parquet/encoder.cc @@ -875,10 +875,12 @@ class ByteStreamSplitEncoderBase : public EncoderImpl, return buf; } auto output_buffer = AllocateBuffer(this->memory_pool(), EstimatedDataEncodedSize()); - uint8_t* output_buffer_raw = output_buffer->mutable_data(); - const uint8_t* raw_values = sink_.data(); - ::arrow::util::internal::ByteStreamSplitEncode( - raw_values, /*width=*/byte_width_, num_values_in_buffer_, output_buffer_raw); + if (num_values_in_buffer_ > 0) { + uint8_t* output_buffer_raw = output_buffer->mutable_data(); + const uint8_t* raw_values = sink_.data(); + ::arrow::util::internal::ByteStreamSplitEncode( + raw_values, /*width=*/byte_width_, num_values_in_buffer_, output_buffer_raw); + } sink_.Reset(); num_values_in_buffer_ = 0; return output_buffer; diff --git a/cpp/src/parquet/encoding_test.cc b/cpp/src/parquet/encoding_test.cc index 9c88eb468a4..831829e4a21 100644 --- a/cpp/src/parquet/encoding_test.cc +++ b/cpp/src/parquet/encoding_test.cc @@ -1705,7 +1705,7 @@ TYPED_TEST(TestByteStreamSplitEncoding, RoundTripSpace) { for (auto null_prob : {0.001, 0.1, 0.5, 0.9, 0.999}) { // Test with both size and offset up to 3 Simd block - for (auto i = 1; i < kSimdSize * 3; i++) { + for (auto i = 0; i < kSimdSize * 3; i++) { ASSERT_NO_FATAL_FAILURE(this->ExecuteSpaced(i, 1, 0, null_prob)); ASSERT_NO_FATAL_FAILURE(this->ExecuteSpaced(i, 1, i + 1, null_prob)); } diff --git a/testing b/testing index 4aeaf00ad3e..9bf001b7d6a 160000 --- a/testing +++ b/testing @@ -1 +1 @@ -Subproject commit 4aeaf00ad3e726d37852e5be0d3e1bfb7ddc18f9 +Subproject commit 9bf001b7d6ad318e222c06e760bccf480f2f550c