From 6440395068dbba4c50f41d0e16eee7d8df3be24f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 5 Jun 2026 12:34:02 +0000 Subject: [PATCH 1/8] Validate hash string parsing --- merklecpp.h | 8 +++++++- test/coverage.cpp | 21 +++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/merklecpp.h b/merklecpp.h index 5cc87ac..3af5e4e 100644 --- a/merklecpp.h +++ b/merklecpp.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -120,7 +121,12 @@ namespace merkle for (size_t i = 0; i < SIZE; i++) { int tmp = 0; - sscanf(s.c_str() + 2 * i, "%02x", &tmp); + int consumed = 0; + if (sscanf(s.c_str() + 2 * i, "%2x%n", &tmp, &consumed) != 1 || + consumed != 2) + { + throw std::runtime_error("invalid hash string"); + } bytes[i] = tmp; } } diff --git a/test/coverage.cpp b/test/coverage.cpp index d431451..dfc426b 100644 --- a/test/coverage.cpp +++ b/test/coverage.cpp @@ -108,6 +108,26 @@ namespace "deserialise_uint64_t should reject short buffers"); } + void test_hash_string_parsing() + { + const std::string valid_hex(64, 'a'); + const merkle::Hash valid_hash(valid_hex); + for (const auto byte : valid_hash.bytes) + { + require(byte == 0xAA, "valid hash string parsed incorrectly"); + } + + require_throws( + [] { merkle::Hash(std::string(64, 'z')); }, + "hash string should reject non-hex digits"); + + std::string partially_invalid(64, '0'); + partially_invalid[1] = 'z'; + require_throws( + [&] { merkle::Hash(partially_invalid); }, + "hash string should reject partially parsed hex bytes"); + } + void test_path_metadata_and_equality() { merkle::Tree tree; @@ -274,6 +294,7 @@ int main() try { test_serialisation_helpers(); + test_hash_string_parsing(); test_path_metadata_and_equality(); test_tree_partial_serialisation_bounds(); test_tree_assignment_and_moves(); From edd108e1a7442be9b97b85f64aba91dc4f119f59 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 5 Jun 2026 12:35:17 +0000 Subject: [PATCH 2/8] Reject partial hash byte parses --- merklecpp.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/merklecpp.h b/merklecpp.h index 3af5e4e..d87d747 100644 --- a/merklecpp.h +++ b/merklecpp.h @@ -120,14 +120,14 @@ namespace merkle } for (size_t i = 0; i < SIZE; i++) { - int tmp = 0; - int consumed = 0; - if (sscanf(s.c_str() + 2 * i, "%2x%n", &tmp, &consumed) != 1 || - consumed != 2) + unsigned int tmp = 0; + char extra = 0; + const char byte_string[3] = {s[2 * i], s[2 * i + 1], 0}; + if (sscanf(byte_string, "%2x%c", &tmp, &extra) != 1) { throw std::runtime_error("invalid hash string"); } - bytes[i] = tmp; + bytes[i] = static_cast(tmp); } } From cce8ad0f097fde30a2ce82d4218c1022814fd856 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 5 Jun 2026 12:38:28 +0000 Subject: [PATCH 3/8] Add hash parse regression coverage --- merklecpp.h | 7 +++++-- test/coverage.cpp | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/merklecpp.h b/merklecpp.h index d87d747..428d94d 100644 --- a/merklecpp.h +++ b/merklecpp.h @@ -121,9 +121,12 @@ namespace merkle for (size_t i = 0; i < SIZE; i++) { unsigned int tmp = 0; - char extra = 0; + char high[2] = {}; + char low[2] = {}; const char byte_string[3] = {s[2 * i], s[2 * i + 1], 0}; - if (sscanf(byte_string, "%2x%c", &tmp, &extra) != 1) + if (sscanf(byte_string, "%1[0-9a-fA-F]%1[0-9a-fA-F]", high, low) != + 2 || + sscanf(byte_string, "%2x", &tmp) != 1) { throw std::runtime_error("invalid hash string"); } diff --git a/test/coverage.cpp b/test/coverage.cpp index dfc426b..4eff7ad 100644 --- a/test/coverage.cpp +++ b/test/coverage.cpp @@ -124,7 +124,7 @@ namespace std::string partially_invalid(64, '0'); partially_invalid[1] = 'z'; require_throws( - [&] { merkle::Hash(partially_invalid); }, + [&] { (void)merkle::Hash(partially_invalid); }, "hash string should reject partially parsed hex bytes"); } From 40d79d14c01e4b22a6a0889ed5118125e2012a71 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 5 Jun 2026 12:40:58 +0000 Subject: [PATCH 4/8] Simplify hash parse validation --- merklecpp.h | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/merklecpp.h b/merklecpp.h index 428d94d..59a23d3 100644 --- a/merklecpp.h +++ b/merklecpp.h @@ -118,19 +118,29 @@ namespace merkle { throw std::runtime_error("invalid hash string"); } + const auto hex_value = [](char c) { + if (c >= '0' && c <= '9') + { + return static_cast(c - '0'); + } + if (c >= 'a' && c <= 'f') + { + return static_cast(c - 'a' + 10); + } + return static_cast(c - 'A' + 10); + }; for (size_t i = 0; i < SIZE; i++) { - unsigned int tmp = 0; char high[2] = {}; char low[2] = {}; const char byte_string[3] = {s[2 * i], s[2 * i + 1], 0}; if (sscanf(byte_string, "%1[0-9a-fA-F]%1[0-9a-fA-F]", high, low) != - 2 || - sscanf(byte_string, "%2x", &tmp) != 1) + 2) { throw std::runtime_error("invalid hash string"); } - bytes[i] = static_cast(tmp); + bytes[i] = + static_cast((hex_value(high[0]) << 4) | hex_value(low[0])); } } From 219303d3d8f9d192638e9c0ac0d7c2eb589658f5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 5 Jun 2026 12:42:51 +0000 Subject: [PATCH 5/8] Tighten hash digit conversion --- merklecpp.h | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/merklecpp.h b/merklecpp.h index 59a23d3..524bbd8 100644 --- a/merklecpp.h +++ b/merklecpp.h @@ -127,20 +127,23 @@ namespace merkle { return static_cast(c - 'a' + 10); } - return static_cast(c - 'A' + 10); + if (c >= 'A' && c <= 'F') + { + return static_cast(c - 'A' + 10); + } + throw std::runtime_error("invalid hash string"); }; for (size_t i = 0; i < SIZE; i++) { - char high[2] = {}; - char low[2] = {}; + char hex_byte[3] = {}; const char byte_string[3] = {s[2 * i], s[2 * i + 1], 0}; - if (sscanf(byte_string, "%1[0-9a-fA-F]%1[0-9a-fA-F]", high, low) != - 2) + if (sscanf(byte_string, "%2[0-9a-fA-F]", hex_byte) != 1 || + hex_byte[1] == 0) { throw std::runtime_error("invalid hash string"); } - bytes[i] = - static_cast((hex_value(high[0]) << 4) | hex_value(low[0])); + bytes[i] = static_cast( + (hex_value(hex_byte[0]) << 4) | hex_value(hex_byte[1])); } } From 4107399d5305bc8a9df44ce5a45a6226385909d8 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 5 Jun 2026 12:44:45 +0000 Subject: [PATCH 6/8] Use single hash digit scan --- merklecpp.h | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/merklecpp.h b/merklecpp.h index 524bbd8..b472a43 100644 --- a/merklecpp.h +++ b/merklecpp.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -118,32 +119,15 @@ namespace merkle { throw std::runtime_error("invalid hash string"); } - const auto hex_value = [](char c) { - if (c >= '0' && c <= '9') - { - return static_cast(c - '0'); - } - if (c >= 'a' && c <= 'f') - { - return static_cast(c - 'a' + 10); - } - if (c >= 'A' && c <= 'F') - { - return static_cast(c - 'A' + 10); - } - throw std::runtime_error("invalid hash string"); - }; for (size_t i = 0; i < SIZE; i++) { char hex_byte[3] = {}; - const char byte_string[3] = {s[2 * i], s[2 * i + 1], 0}; - if (sscanf(byte_string, "%2[0-9a-fA-F]", hex_byte) != 1 || + if (sscanf(s.c_str() + 2 * i, "%2[0-9a-fA-F]", hex_byte) != 1 || hex_byte[1] == 0) { throw std::runtime_error("invalid hash string"); } - bytes[i] = static_cast( - (hex_value(hex_byte[0]) << 4) | hex_value(hex_byte[1])); + bytes[i] = static_cast(std::strtoul(hex_byte, nullptr, 16)); } } From c003a77f12e978a0f980f0c48e9fef053f09e555 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 5 Jun 2026 13:12:10 +0000 Subject: [PATCH 7/8] Use direct hex parsing --- merklecpp.h | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/merklecpp.h b/merklecpp.h index b472a43..2ef483a 100644 --- a/merklecpp.h +++ b/merklecpp.h @@ -7,9 +7,7 @@ #include #include #include -#include #include -#include #include #include #include @@ -90,6 +88,26 @@ namespace merkle return r; } + static inline bool decode_hex_digit(char c, uint8_t& value) + { + if ('0' <= c && c <= '9') + { + value = static_cast(c - '0'); + return true; + } + if ('a' <= c && c <= 'f') + { + value = static_cast(c - 'a' + 10); + return true; + } + if ('A' <= c && c <= 'F') + { + value = static_cast(c - 'A' + 10); + return true; + } + return false; + } + /// @brief Template for fixed-size hashes /// @tparam SIZE Size of the hash in number of bytes template @@ -121,13 +139,14 @@ namespace merkle } for (size_t i = 0; i < SIZE; i++) { - char hex_byte[3] = {}; - if (sscanf(s.c_str() + 2 * i, "%2[0-9a-fA-F]", hex_byte) != 1 || - hex_byte[1] == 0) + uint8_t high = 0; + uint8_t low = 0; + if (!decode_hex_digit(s[2 * i], high) || + !decode_hex_digit(s[2 * i + 1], low)) { throw std::runtime_error("invalid hash string"); } - bytes[i] = static_cast(std::strtoul(hex_byte, nullptr, 16)); + bytes[i] = static_cast((high << 4) | low); } } From 5f52c96526b65b1b024fc6e9f8f32b35c45251bf Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 5 Jun 2026 13:40:10 +0000 Subject: [PATCH 8/8] Add mixed-case hex parsing coverage --- test/coverage.cpp | 6 ++++++ test/unit_tests.cpp | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/test/coverage.cpp b/test/coverage.cpp index 4eff7ad..ef62271 100644 --- a/test/coverage.cpp +++ b/test/coverage.cpp @@ -117,6 +117,12 @@ namespace require(byte == 0xAA, "valid hash string parsed incorrectly"); } + std::string mixed_case_hex(64, '0'); + mixed_case_hex[0] = 'A'; + mixed_case_hex[1] = 'f'; + const merkle::Hash mixed_case_hash(mixed_case_hex); + require(mixed_case_hash.bytes[0] == 0xAF, "mixed-case hash string parsed incorrectly"); + require_throws( [] { merkle::Hash(std::string(64, 'z')); }, "hash string should reject non-hex digits"); diff --git a/test/unit_tests.cpp b/test/unit_tests.cpp index c6b7fe2..e6f6967 100644 --- a/test/unit_tests.cpp +++ b/test/unit_tests.cpp @@ -38,6 +38,11 @@ TEST_CASE("HashT constructors and error paths") const merkle::Hash h_str(valid_hex); REQUIRE(h_str.bytes[0] == 0xAB); + valid_hex[0] = 'A'; + valid_hex[1] = 'f'; + const merkle::Hash mixed_case_h_str(valid_hex); + REQUIRE(mixed_case_h_str.bytes[0] == 0xAF); + // String constructor: invalid length throws REQUIRE_THROWS(merkle::Hash(std::string(63, '0'))); REQUIRE_THROWS(merkle::Hash(std::string(65, '0')));