From 7d69913b273258edd5c5ecfca6c0e060eeea1708 Mon Sep 17 00:00:00 2001 From: Symmetricity <184246+Symmetricity@users.noreply.github.com> Date: Sat, 16 May 2026 18:39:53 +0200 Subject: [PATCH] Decode PBF blobs by data field Blob.raw_size is optional metadata for compressed payloads. The reader was using the absence of raw_size to decide that a blob was raw, so a zlib_data blob without raw_size could be returned to protozero still compressed and surface as invalid_tag_exception while parsing a PrimitiveBlock. Track the actual Blob data field instead. Return raw payloads only when the raw field is present, inflate zlib_data even when raw_size is omitted, and report missing or unsupported blob payloads explicitly. --- src/pbf_reader.cpp | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/src/pbf_reader.cpp b/src/pbf_reader.cpp index ed400a49..d12cfb28 100644 --- a/src/pbf_reader.cpp +++ b/src/pbf_reader.cpp @@ -68,7 +68,10 @@ protozero::data_view PbfReader::PbfReader::readBlob(int32_t datasize, std::istre if (input.eof()) throw std::runtime_error("readBlob: unexpected eof"); + enum class BlobDataType { None, Raw, Zlib }; + int32_t rawSize = -1; + BlobDataType dataType = BlobDataType::None; protozero::data_view view; protozero::pbf_message message{&blobStorage[0], blobStorage.size()}; while (message.next()) { @@ -77,23 +80,34 @@ protozero::data_view PbfReader::PbfReader::readBlob(int32_t datasize, std::istre rawSize = message.get_int32(); break; case Schema::Blob::oneof_data_bytes_raw: + dataType = BlobDataType::Raw; view = message.get_view(); break; case Schema::Blob::oneof_data_bytes_zlib_data: + dataType = BlobDataType::Zlib; view = message.get_view(); break; + case Schema::Blob::oneof_data_bytes_lzma_data: + case Schema::Blob::oneof_data_bytes_lz4_data: + case Schema::Blob::oneof_data_bytes_zstd_data: + throw std::runtime_error("Blob: unsupported compression tag: " + std::to_string(static_cast(message.tag()))); default: throw std::runtime_error("Blob: unknown tag: " + std::to_string(static_cast(message.tag()))); } } - if (rawSize == -1) - // Data is not compressed, can return it directly. + if (dataType == BlobDataType::None) + throw std::runtime_error("Blob: missing data"); + + if (dataType == BlobDataType::Raw) return view; - blobStorage2.resize(rawSize); + if (rawSize != -1) + blobStorage2.resize(rawSize); + else + blobStorage2.clear(); decompress_string(blobStorage2, view.data(), view.size(), false); - return { &blobStorage2[0], blobStorage2.size() }; + return { blobStorage2.data(), blobStorage2.size() }; } PbfReader::HeaderBBox PbfReader::PbfReader::readHeaderBBox(protozero::data_view data) { @@ -587,4 +601,3 @@ PbfReader::HeaderBlock PbfReader::PbfReader::readHeaderFromFile(std::istream& in return header; } -