From 222bb16fc802d8edcef115d5d4515efdc6d608ce Mon Sep 17 00:00:00 2001 From: Udayan Mahalwar Date: Fri, 29 May 2026 10:22:13 +0530 Subject: [PATCH 1/5] GH-49904: [C++] Deprecate RandomAccessFile legacy ReadAt and ReadAsync --- cpp/src/arrow/adapters/orc/adapter.cc | 2 +- cpp/src/arrow/buffer_test.cc | 2 +- cpp/src/arrow/io/file_test.cc | 4 ++-- cpp/src/arrow/io/interfaces.cc | 8 ++++++-- cpp/src/arrow/io/interfaces.h | 5 ++++- cpp/src/arrow/io/slow.cc | 4 ++-- cpp/src/arrow/io/test_common.cc | 2 +- cpp/src/arrow/ipc/message.cc | 2 +- 8 files changed, 18 insertions(+), 11 deletions(-) diff --git a/cpp/src/arrow/adapters/orc/adapter.cc b/cpp/src/arrow/adapters/orc/adapter.cc index 51cca497485c..530668afa57a 100644 --- a/cpp/src/arrow/adapters/orc/adapter.cc +++ b/cpp/src/arrow/adapters/orc/adapter.cc @@ -113,7 +113,7 @@ class ArrowInputFile : public liborc::InputStream { uint64_t getNaturalReadSize() const override { return 128 * 1024; } void read(void* buf, uint64_t length, uint64_t offset) override { - ORC_ASSIGN_OR_THROW(int64_t bytes_read, file_->ReadAt(offset, length, buf)); + ORC_ASSIGN_OR_THROW(int64_t bytes_read, file_->ReadAt(offset, length, /*allow_short_read=*/true , buf)); if (static_cast(bytes_read) != length) { throw liborc::ParseError("Short read from arrow input file"); diff --git a/cpp/src/arrow/buffer_test.cc b/cpp/src/arrow/buffer_test.cc index 4dd210076ed1..5a728d8341b0 100644 --- a/cpp/src/arrow/buffer_test.cc +++ b/cpp/src/arrow/buffer_test.cc @@ -616,7 +616,7 @@ TEST(TestBuffer, GetReader) { auto buf = std::make_shared(data, data_str.size()); ASSERT_OK_AND_ASSIGN(auto reader, Buffer::GetReader(buf)); ASSERT_OK_AND_EQ(static_cast(data_str.size()), reader->GetSize()); - ASSERT_OK_AND_ASSIGN(auto read_buf, reader->ReadAt(5, 4)); + ASSERT_OK_AND_ASSIGN(auto read_buf, reader->ReadAt(5, 4,/*allow_short_read=*/true)); AssertBufferEqual(*read_buf, "data"); } diff --git a/cpp/src/arrow/io/file_test.cc b/cpp/src/arrow/io/file_test.cc index 1e2de3f07a5d..9bf694859b55 100644 --- a/cpp/src/arrow/io/file_test.cc +++ b/cpp/src/arrow/io/file_test.cc @@ -400,8 +400,8 @@ TEST_F(TestReadableFile, ReadAsync) { MakeTestFile(); OpenFile(); - auto fut1 = file_->ReadAsync(default_io_context(), 1, 10); - auto fut2 = file_->ReadAsync(default_io_context(), 0, 4); + auto fut1 = file_->ReadAsync(default_io_context(), 1, 10, /*allow_short_read=*/true); + auto fut2 = file_->ReadAsync(default_io_context(), 0, 4, /*allow_short_read=*/true); auto fut3 = file_->ReadAsync(default_io_context(), 1, 10, /*allow_short_read=*/false); auto fut4 = file_->ReadAsync(default_io_context(), 0, 4, /*allow_short_read=*/false); ASSERT_OK_AND_ASSIGN(auto buf1, fut1.result()); diff --git a/cpp/src/arrow/io/interfaces.cc b/cpp/src/arrow/io/interfaces.cc index 41a7fdecb229..0085267b7963 100644 --- a/cpp/src/arrow/io/interfaces.cc +++ b/cpp/src/arrow/io/interfaces.cc @@ -151,7 +151,9 @@ RandomAccessFile::RandomAccessFile() : interface_impl_(new Impl()) {} Result RandomAccessFile::ReadAt(int64_t position, int64_t nbytes, bool allow_short_read, void* out) { + ARROW_SUPPRESS_DEPRECATION_WARNING ARROW_ASSIGN_OR_RAISE(auto real_nbytes, ReadAt(position, nbytes, out)); + ARROW_UNSUPPRESS_DEPRECATION_WARNING if (!allow_short_read && real_nbytes != nbytes) { return Status::IOError("File too short: expected to be able to read ", nbytes, " bytes, got ", real_nbytes); @@ -167,7 +169,9 @@ Result RandomAccessFile::ReadAt(int64_t position, int64_t nbytes, void* Result> RandomAccessFile::ReadAt(int64_t position, int64_t nbytes, bool allow_short_read) { + ARROW_SUPPRESS_DEPRECATION_WARNING ARROW_ASSIGN_OR_RAISE(auto buffer, ReadAt(position, nbytes)); + ARROW_UNSUPPRESS_DEPRECATION_WARNING // XXX the internal `IoRecordedRandomAccessFile` can return a null buffer if (!allow_short_read && buffer && buffer->size() != nbytes) { return Status::IOError("File too short: expected to be able to read ", nbytes, @@ -277,7 +281,7 @@ class FileSegmentReader RETURN_NOT_OK(CheckOpen()); int64_t bytes_to_read = std::min(nbytes, nbytes_ - position_); ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, - file_->ReadAt(file_offset_ + position_, bytes_to_read, out)); + file_->ReadAt(file_offset_ + position_, bytes_to_read,/*allow_short_read=*/true, out)); position_ += bytes_read; return bytes_read; } @@ -286,7 +290,7 @@ class FileSegmentReader RETURN_NOT_OK(CheckOpen()); int64_t bytes_to_read = std::min(nbytes, nbytes_ - position_); ARROW_ASSIGN_OR_RAISE(auto buffer, - file_->ReadAt(file_offset_ + position_, bytes_to_read)); + file_->ReadAt(file_offset_ + position_, bytes_to_read , /*allow_short_read=*/true)); position_ += buffer->size(); return buffer; } diff --git a/cpp/src/arrow/io/interfaces.h b/cpp/src/arrow/io/interfaces.h index 8cb982470715..610754a9b49d 100644 --- a/cpp/src/arrow/io/interfaces.h +++ b/cpp/src/arrow/io/interfaces.h @@ -29,7 +29,6 @@ #include "arrow/util/macros.h" #include "arrow/util/type_fwd.h" #include "arrow/util/visibility.h" - namespace arrow { namespace io { @@ -295,6 +294,7 @@ class ARROW_EXPORT RandomAccessFile : public InputStream, public Seekable { /// \param[in] nbytes The number of bytes to read /// \param[out] out The buffer to read bytes into /// \return The number of bytes read, or an error + ARROW_DEPRECATED("Deprecated in 17.0.0. Use signature with allow_short_read instead") virtual Result ReadAt(int64_t position, int64_t nbytes, void* out); /// \brief Read data from given file position. @@ -318,18 +318,21 @@ class ARROW_EXPORT RandomAccessFile : public InputStream, public Seekable { /// \param[in] position Where to read bytes from /// \param[in] nbytes The number of bytes to read /// \return A buffer containing the bytes read, or an error + ARROW_DEPRECATED("Deprecated in 17.0.0. Use signature with allow_short_read instead") virtual Result> ReadAt(int64_t position, int64_t nbytes); /// EXPERIMENTAL: Read data asynchronously. virtual Future> ReadAsync(const IOContext&, int64_t position, int64_t nbytes, bool allow_short_read); + ARROW_DEPRECATED("Deprecated in 17.0.0. Use signature with allow_short_read instead") virtual Future> ReadAsync(const IOContext&, int64_t position, int64_t nbytes); /// EXPERIMENTAL: Read data asynchronously, using the file's IOContext. Future> ReadAsync(int64_t position, int64_t nbytes, bool allow_short_read); + ARROW_DEPRECATED("Deprecated in 17.0.0. Use signature with allow_short_read instead") Future> ReadAsync(int64_t position, int64_t nbytes); /// EXPERIMENTAL: Explicit multi-read. diff --git a/cpp/src/arrow/io/slow.cc b/cpp/src/arrow/io/slow.cc index 44a50c5c25e6..f637330f463a 100644 --- a/cpp/src/arrow/io/slow.cc +++ b/cpp/src/arrow/io/slow.cc @@ -131,7 +131,7 @@ Result> SlowRandomAccessFile::Read(int64_t nbytes) { Result SlowRandomAccessFile::ReadAt(int64_t position, int64_t nbytes, void* out) { latencies_->Sleep(); - return stream_->ReadAt(position, nbytes, out); + return stream_->ReadAt(position, nbytes,/*allow_short_read=*/true ,out); } Result SlowRandomAccessFile::ReadAt(int64_t position, int64_t nbytes, @@ -143,7 +143,7 @@ Result SlowRandomAccessFile::ReadAt(int64_t position, int64_t nbytes, Result> SlowRandomAccessFile::ReadAt(int64_t position, int64_t nbytes) { latencies_->Sleep(); - return stream_->ReadAt(position, nbytes); + return stream_->ReadAt(position, nbytes , /*allow_short_read=*/true); } Result> SlowRandomAccessFile::ReadAt(int64_t position, diff --git a/cpp/src/arrow/io/test_common.cc b/cpp/src/arrow/io/test_common.cc index f3f5073fd67e..44bdb3da19f7 100644 --- a/cpp/src/arrow/io/test_common.cc +++ b/cpp/src/arrow/io/test_common.cc @@ -133,7 +133,7 @@ class TrackedRandomAccessFileImpl : public TrackedRandomAccessFile { Result GetSize() override { return delegate_->GetSize(); } Result ReadAt(int64_t position, int64_t nbytes, void* out) override { SaveReadRange(position, nbytes); - return delegate_->ReadAt(position, nbytes, out); + return delegate_->ReadAt(position, nbytes , /*allow_short_read=*/true ,out); } Result> ReadAt(int64_t position, int64_t nbytes) override { return ReadAt(position, nbytes, /*allow_short_read=*/true); diff --git a/cpp/src/arrow/ipc/message.cc b/cpp/src/arrow/ipc/message.cc index 1fef961ff8b3..e4614af6f000 100644 --- a/cpp/src/arrow/ipc/message.cc +++ b/cpp/src/arrow/ipc/message.cc @@ -305,7 +305,7 @@ Status ReadFieldsSubset(int64_t offset, int32_t metadata_length, RETURN_NOT_OK(fields_loader(batch, &io_recorded_random_access_file)); const auto& read_ranges = io_recorded_random_access_file.GetReadRanges(); for (const auto& range : read_ranges) { - auto read_result = file->ReadAt(offset + metadata_length + range.offset, range.length, + auto read_result = file->ReadAt(offset + metadata_length + range.offset, range.length,/*allow_short_read=*/true, body->mutable_data() + range.offset); if (!read_result.ok()) { return Status::IOError("Failed to read message body, error ", From f8cc3689af5bfe37fd364c3f06eaeeb9d75e5019 Mon Sep 17 00:00:00 2001 From: Udayan Mahalwar Date: Sun, 31 May 2026 22:35:32 +0530 Subject: [PATCH 2/5] Address review comments and update allow_short_read in remaining files --- cpp/src/arrow/io/interfaces.cc | 2 +- cpp/src/arrow/io/interfaces.h | 9 +++++---- cpp/src/arrow/io/slow.cc | 4 ++-- cpp/src/arrow/ipc/message.cc | 2 +- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/cpp/src/arrow/io/interfaces.cc b/cpp/src/arrow/io/interfaces.cc index 0085267b7963..6029daeface6 100644 --- a/cpp/src/arrow/io/interfaces.cc +++ b/cpp/src/arrow/io/interfaces.cc @@ -281,7 +281,7 @@ class FileSegmentReader RETURN_NOT_OK(CheckOpen()); int64_t bytes_to_read = std::min(nbytes, nbytes_ - position_); ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, - file_->ReadAt(file_offset_ + position_, bytes_to_read,/*allow_short_read=*/true, out)); + file_->ReadAt(file_offset_ + position_, bytes_to_read, /*allow_short_read=*/true, out)); position_ += bytes_read; return bytes_read; } diff --git a/cpp/src/arrow/io/interfaces.h b/cpp/src/arrow/io/interfaces.h index 610754a9b49d..af4a7eaddde5 100644 --- a/cpp/src/arrow/io/interfaces.h +++ b/cpp/src/arrow/io/interfaces.h @@ -29,6 +29,7 @@ #include "arrow/util/macros.h" #include "arrow/util/type_fwd.h" #include "arrow/util/visibility.h" + namespace arrow { namespace io { @@ -294,7 +295,7 @@ class ARROW_EXPORT RandomAccessFile : public InputStream, public Seekable { /// \param[in] nbytes The number of bytes to read /// \param[out] out The buffer to read bytes into /// \return The number of bytes read, or an error - ARROW_DEPRECATED("Deprecated in 17.0.0. Use signature with allow_short_read instead") + ARROW_DEPRECATED("Deprecated in 25.0.0. Use signature with allow_short_read instead") virtual Result ReadAt(int64_t position, int64_t nbytes, void* out); /// \brief Read data from given file position. @@ -318,21 +319,21 @@ class ARROW_EXPORT RandomAccessFile : public InputStream, public Seekable { /// \param[in] position Where to read bytes from /// \param[in] nbytes The number of bytes to read /// \return A buffer containing the bytes read, or an error - ARROW_DEPRECATED("Deprecated in 17.0.0. Use signature with allow_short_read instead") + ARROW_DEPRECATED("Deprecated in 25.0.0. Use signature with allow_short_read instead") virtual Result> ReadAt(int64_t position, int64_t nbytes); /// EXPERIMENTAL: Read data asynchronously. virtual Future> ReadAsync(const IOContext&, int64_t position, int64_t nbytes, bool allow_short_read); - ARROW_DEPRECATED("Deprecated in 17.0.0. Use signature with allow_short_read instead") + ARROW_DEPRECATED("Deprecated in 25.0.0. Use signature with allow_short_read instead") virtual Future> ReadAsync(const IOContext&, int64_t position, int64_t nbytes); /// EXPERIMENTAL: Read data asynchronously, using the file's IOContext. Future> ReadAsync(int64_t position, int64_t nbytes, bool allow_short_read); - ARROW_DEPRECATED("Deprecated in 17.0.0. Use signature with allow_short_read instead") + ARROW_DEPRECATED("Deprecated in 25.0.0. Use signature with allow_short_read instead") Future> ReadAsync(int64_t position, int64_t nbytes); /// EXPERIMENTAL: Explicit multi-read. diff --git a/cpp/src/arrow/io/slow.cc b/cpp/src/arrow/io/slow.cc index f637330f463a..ca638505e1e4 100644 --- a/cpp/src/arrow/io/slow.cc +++ b/cpp/src/arrow/io/slow.cc @@ -131,7 +131,7 @@ Result> SlowRandomAccessFile::Read(int64_t nbytes) { Result SlowRandomAccessFile::ReadAt(int64_t position, int64_t nbytes, void* out) { latencies_->Sleep(); - return stream_->ReadAt(position, nbytes,/*allow_short_read=*/true ,out); + return stream_->ReadAt(position, nbytes, /*allow_short_read=*/true, out); } Result SlowRandomAccessFile::ReadAt(int64_t position, int64_t nbytes, @@ -143,7 +143,7 @@ Result SlowRandomAccessFile::ReadAt(int64_t position, int64_t nbytes, Result> SlowRandomAccessFile::ReadAt(int64_t position, int64_t nbytes) { latencies_->Sleep(); - return stream_->ReadAt(position, nbytes , /*allow_short_read=*/true); + return stream_->ReadAt(position, nbytes, /*allow_short_read=*/true); } Result> SlowRandomAccessFile::ReadAt(int64_t position, diff --git a/cpp/src/arrow/ipc/message.cc b/cpp/src/arrow/ipc/message.cc index e4614af6f000..a5f65955e420 100644 --- a/cpp/src/arrow/ipc/message.cc +++ b/cpp/src/arrow/ipc/message.cc @@ -305,7 +305,7 @@ Status ReadFieldsSubset(int64_t offset, int32_t metadata_length, RETURN_NOT_OK(fields_loader(batch, &io_recorded_random_access_file)); const auto& read_ranges = io_recorded_random_access_file.GetReadRanges(); for (const auto& range : read_ranges) { - auto read_result = file->ReadAt(offset + metadata_length + range.offset, range.length,/*allow_short_read=*/true, + auto read_result = file->ReadAt(offset + metadata_length + range.offset, range.length, /*allow_short_read=*/true, body->mutable_data() + range.offset); if (!read_result.ok()) { return Status::IOError("Failed to read message body, error ", From eb798d2b57f4998c3e83987fed9b1384577cc325 Mon Sep 17 00:00:00 2001 From: Udayan Mahalwar Date: Sun, 31 May 2026 23:16:48 +0530 Subject: [PATCH 3/5] Update test_util.cc to pass allow_short_read to ReadAt --- cpp/src/arrow/filesystem/test_util.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/filesystem/test_util.cc b/cpp/src/arrow/filesystem/test_util.cc index da73a8ec16b5..5b8c555b5539 100644 --- a/cpp/src/arrow/filesystem/test_util.cc +++ b/cpp/src/arrow/filesystem/test_util.cc @@ -1157,11 +1157,11 @@ void GenericFileSystemTest::TestOpenInputFile(FileSystem* fs) { ASSERT_OK_AND_ASSIGN(buffer, file->Read(6)); AssertBufferEqual(*buffer, "other "); // Should return the same slice independent of the current position - ASSERT_OK_AND_ASSIGN(buffer, file->ReadAt(2, 3)); + ASSERT_OK_AND_ASSIGN(buffer, file->ReadAt(2, 3, /*allow_short_read=*/true)); AssertBufferEqual(*buffer, "me "); ASSERT_OK_AND_EQ(15, file->GetSize()); ASSERT_OK(file->Close()); - ASSERT_RAISES(Invalid, file->ReadAt(1, 1)); // Stream is closed + ASSERT_RAISES(Invalid, file->ReadAt(1, 1, /*allow_short_read=*/true)); // Stream is closed // Trailing slash rejected ASSERT_RAISES(IOError, fs->OpenInputFile("AB/abc/")); @@ -1183,7 +1183,7 @@ void GenericFileSystemTest::TestOpenInputFileAsync(FileSystem* fs) { std::shared_ptr file; std::shared_ptr buffer; ASSERT_FINISHES_OK_AND_ASSIGN(file, fs->OpenInputFileAsync("AB/abc")); - ASSERT_OK_AND_ASSIGN(buffer, file->ReadAt(5, 6)); + ASSERT_OK_AND_ASSIGN(buffer, file->ReadAt(5, 6, /*allow_short_read=*/true)); AssertBufferEqual(*buffer, "other "); ASSERT_OK(file->Close()); From 8126846461fa6db0b5c68df4ded558e96b661421 Mon Sep 17 00:00:00 2001 From: Udayan_Mahalwar Date: Sun, 31 May 2026 23:26:26 +0530 Subject: [PATCH 4/5] Fix formatting of read function in adapter.cc --- cpp/src/arrow/adapters/orc/adapter.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/adapters/orc/adapter.cc b/cpp/src/arrow/adapters/orc/adapter.cc index 530668afa57a..691962a6e54f 100644 --- a/cpp/src/arrow/adapters/orc/adapter.cc +++ b/cpp/src/arrow/adapters/orc/adapter.cc @@ -113,7 +113,7 @@ class ArrowInputFile : public liborc::InputStream { uint64_t getNaturalReadSize() const override { return 128 * 1024; } void read(void* buf, uint64_t length, uint64_t offset) override { - ORC_ASSIGN_OR_THROW(int64_t bytes_read, file_->ReadAt(offset, length, /*allow_short_read=*/true , buf)); + ORC_ASSIGN_OR_THROW(int64_t bytes_read, file_->ReadAt(offset, length, /*allow_short_read=*/true, buf)); if (static_cast(bytes_read) != length) { throw liborc::ParseError("Short read from arrow input file"); From 4de0c8787328f2b56061f09a5970cfc2793648be Mon Sep 17 00:00:00 2001 From: Udayan_Mahalwar Date: Sun, 31 May 2026 23:31:25 +0530 Subject: [PATCH 5/5] Fix deprecation message formatting in interfaces.h --- cpp/src/arrow/io/interfaces.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/io/interfaces.h b/cpp/src/arrow/io/interfaces.h index af4a7eaddde5..e5a04b9fa847 100644 --- a/cpp/src/arrow/io/interfaces.h +++ b/cpp/src/arrow/io/interfaces.h @@ -295,7 +295,7 @@ class ARROW_EXPORT RandomAccessFile : public InputStream, public Seekable { /// \param[in] nbytes The number of bytes to read /// \param[out] out The buffer to read bytes into /// \return The number of bytes read, or an error - ARROW_DEPRECATED("Deprecated in 25.0.0. Use signature with allow_short_read instead") + ARROW_DEPRECATED("Deprecated in 25.0.0 Use signature with allow_short_read instead") virtual Result ReadAt(int64_t position, int64_t nbytes, void* out); /// \brief Read data from given file position. @@ -319,21 +319,21 @@ class ARROW_EXPORT RandomAccessFile : public InputStream, public Seekable { /// \param[in] position Where to read bytes from /// \param[in] nbytes The number of bytes to read /// \return A buffer containing the bytes read, or an error - ARROW_DEPRECATED("Deprecated in 25.0.0. Use signature with allow_short_read instead") + ARROW_DEPRECATED("Deprecated in 25.0.0 Use signature with allow_short_read instead") virtual Result> ReadAt(int64_t position, int64_t nbytes); /// EXPERIMENTAL: Read data asynchronously. virtual Future> ReadAsync(const IOContext&, int64_t position, int64_t nbytes, bool allow_short_read); - ARROW_DEPRECATED("Deprecated in 25.0.0. Use signature with allow_short_read instead") + ARROW_DEPRECATED("Deprecated in 25.0.0 Use signature with allow_short_read instead") virtual Future> ReadAsync(const IOContext&, int64_t position, int64_t nbytes); /// EXPERIMENTAL: Read data asynchronously, using the file's IOContext. Future> ReadAsync(int64_t position, int64_t nbytes, bool allow_short_read); - ARROW_DEPRECATED("Deprecated in 25.0.0. Use signature with allow_short_read instead") + ARROW_DEPRECATED("Deprecated in 25.0.0 Use signature with allow_short_read instead") Future> ReadAsync(int64_t position, int64_t nbytes); /// EXPERIMENTAL: Explicit multi-read.