Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions cpp/src/parquet/decoder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1000,8 +1000,9 @@ class DictDecoderImpl : public TypedDecoderImpl<Type>, public DictDecoder<Type>

inline void DecodeDict(TypedDecoder<Type>* dictionary) {
dictionary_length_ = static_cast<int32_t>(dictionary->values_left());
PARQUET_THROW_NOT_OK(dictionary_->Resize(dictionary_length_ * sizeof(T),
/*shrink_to_fit=*/false));
PARQUET_THROW_NOT_OK(
dictionary_->Resize(static_cast<int64_t>(dictionary_length_) * sizeof(T),
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that sizeof(T) is already a size_t, so this one would only make a different on 32-bit systems.

Copy link
Contributor

@emkornfield emkornfield Feb 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would need to trace through the code some more but is FLBA handled in a different code path (i assume with strings or decimals explicitly)? i.e. sizeof(T) would be the wrong size here?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we just change dictionary_length_ to int64_t (not we seem to explicitly cast it to int32_t right above).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought about that, but it would introduce some downcasts to int/int32_t in other places. Upcasts are safer, so I think it's better to keep it a int32_t.

/*shrink_to_fit=*/false));
dictionary->Decode(dictionary_->mutable_data_as<T>(), dictionary_length_);
}

Expand Down Expand Up @@ -1044,15 +1045,15 @@ void DictDecoderImpl<ByteArrayType>::SetDict(TypedDecoder<ByteArrayType>* dictio

auto* dict_values = dictionary_->mutable_data_as<ByteArray>();

int total_size = 0;
int64_t total_size = 0;
for (int i = 0; i < dictionary_length_; ++i) {
total_size += dict_values[i].len;
}
PARQUET_THROW_NOT_OK(byte_array_data_->Resize(total_size,
/*shrink_to_fit=*/false));
PARQUET_THROW_NOT_OK(
byte_array_offsets_->Resize((dictionary_length_ + 1) * sizeof(int32_t),
/*shrink_to_fit=*/false));
PARQUET_THROW_NOT_OK(byte_array_offsets_->Resize(
(static_cast<int64_t>(dictionary_length_) + 1) * sizeof(int32_t),
/*shrink_to_fit=*/false));

int32_t offset = 0;
uint8_t* bytes_data = byte_array_data_->mutable_data();
Expand All @@ -1073,7 +1074,7 @@ inline void DictDecoderImpl<FLBAType>::SetDict(TypedDecoder<FLBAType>* dictionar
auto* dict_values = dictionary_->mutable_data_as<FLBA>();

int fixed_len = this->type_length_;
int total_size = dictionary_length_ * fixed_len;
int64_t total_size = static_cast<int64_t>(dictionary_length_) * fixed_len;

PARQUET_THROW_NOT_OK(byte_array_data_->Resize(total_size,
/*shrink_to_fit=*/false));
Expand Down
Loading