From 9e4d243af557d30bfb8d33dc969b7ed17afef3d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kacper=20=C5=BB=C3=B3=C5=82kiewski?= Date: Mon, 15 Jun 2026 09:53:10 +0200 Subject: [PATCH 1/5] fix(iOS): rendering numeric entities --- ios/extensions/StringExtension.mm | 69 +++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/ios/extensions/StringExtension.mm b/ios/extensions/StringExtension.mm index a4ad69e5..c6cd1f62 100644 --- a/ios/extensions/StringExtension.mm +++ b/ios/extensions/StringExtension.mm @@ -51,6 +51,75 @@ + (NSDictionary *)getEscapedCharactersInfoFrom:(NSString *)text { } } + // Numeric character references: &#NNNN; (decimal) and &#xHHHH; (hex) + NSRegularExpression *numericEntityRegex = [NSRegularExpression + regularExpressionWithPattern:@"&#(x[0-9a-fA-F]+|[0-9]+);" + options:0 + error:nil]; + + [numericEntityRegex + enumerateMatchesInString:text + options:0 + range:NSMakeRange(0, text.length) + usingBlock:^(NSTextCheckingResult *match, + NSMatchingFlags flags, BOOL *stop) { + NSRange fullRange = [match range]; + NSString *entityStr = [text substringWithRange:fullRange]; + NSString *valueStr = + [text substringWithRange:[match rangeAtIndex:1]]; + + // Convert the matched string into a raw integer (UTF32 + // Code Point) + UTF32Char codePoint = 0; + if ([valueStr hasPrefix:@"x"] || + [valueStr hasPrefix:@"X"]) { + // Parse Hexadecimal (base 16) + const char *hexStr = + [[valueStr substringFromIndex:1] UTF8String]; + codePoint = (UTF32Char)strtoul(hexStr, NULL, 16); + } else { + // Parse Decimal (base 10) + const char *decStr = [valueStr UTF8String]; + codePoint = (UTF32Char)strtoul(decStr, NULL, 10); + } + + // Safety check: The highest valid Unicode character is + // 0x10FFFF. If the parsed number is larger than this, + // it's invalid/corrupted data. We replace it with the + // standard "Replacement Character" () to prevent crashes. + if (codePoint > 0x10FFFF) { + codePoint = 0xFFFD; + } + + NSString *decoded; + if (codePoint <= 0xFFFF) { + // STANDARD CHARACTER: Fits perfectly in one 16-bit + // unichar. + unichar ch = (unichar)codePoint; + decoded = [NSString stringWithCharacters:&ch length:1]; + } else { + // LARGE CHARACTER: Too big for 16 bits. + // We must split the code point into two 16-bit halves + // (a "Surrogate Pair") so NSString can store it + // properly in UTF-16. + UniChar surrogate[2]; + + // Calculate the "High" surrogate half + surrogate[0] = + (UniChar)(0xD800 + ((codePoint - 0x10000) >> 10)); + + // Calculate the "Low" surrogate half + surrogate[1] = + (UniChar)(0xDC00 + ((codePoint - 0x10000) & 0x3FF)); + + // Create the string using both 16-bit pieces + decoded = [NSString stringWithCharacters:surrogate + length:2]; + } + + results[@(fullRange.location)] = @[ entityStr, decoded ]; + }]; + return results; } From b4493b02c2ddc7e49cb5710b4a8fca7c59ed63f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kacper=20=C5=BB=C3=B3=C5=82kiewski?= <74975508+kacperzolkiewski@users.noreply.github.com> Date: Mon, 15 Jun 2026 10:02:31 +0200 Subject: [PATCH 2/5] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- ios/extensions/StringExtension.mm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ios/extensions/StringExtension.mm b/ios/extensions/StringExtension.mm index c6cd1f62..ab4676b5 100644 --- a/ios/extensions/StringExtension.mm +++ b/ios/extensions/StringExtension.mm @@ -53,7 +53,7 @@ + (NSDictionary *)getEscapedCharactersInfoFrom:(NSString *)text { // Numeric character references: &#NNNN; (decimal) and &#xHHHH; (hex) NSRegularExpression *numericEntityRegex = [NSRegularExpression - regularExpressionWithPattern:@"&#(x[0-9a-fA-F]+|[0-9]+);" + regularExpressionWithPattern:@"&#([xX][0-9a-fA-F]+|[0-9]+);" options:0 error:nil]; From d7099a76a41472614fb72ebd0ef49a8e18049cbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kacper=20=C5=BB=C3=B3=C5=82kiewski?= <74975508+kacperzolkiewski@users.noreply.github.com> Date: Mon, 15 Jun 2026 10:02:59 +0200 Subject: [PATCH 3/5] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- ios/extensions/StringExtension.mm | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ios/extensions/StringExtension.mm b/ios/extensions/StringExtension.mm index ab4676b5..1075b981 100644 --- a/ios/extensions/StringExtension.mm +++ b/ios/extensions/StringExtension.mm @@ -83,11 +83,11 @@ + (NSDictionary *)getEscapedCharactersInfoFrom:(NSString *)text { codePoint = (UTF32Char)strtoul(decStr, NULL, 10); } - // Safety check: The highest valid Unicode character is - // 0x10FFFF. If the parsed number is larger than this, - // it's invalid/corrupted data. We replace it with the - // standard "Replacement Character" () to prevent crashes. - if (codePoint > 0x10FFFF) { + // Safety check: Valid Unicode scalar values are 0x1..0x10FFFF, + // excluding surrogate code points (0xD800-0xDFFF). Replace invalid + // values with U+FFFD (Replacement Character) to avoid crashes/truncation. + if (codePoint == 0 || codePoint > 0x10FFFF || + (codePoint >= 0xD800 && codePoint <= 0xDFFF)) { codePoint = 0xFFFD; } From e13fe553cd6b6851680265dd593951781a3a193a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kacper=20=C5=BB=C3=B3=C5=82kiewski?= Date: Mon, 15 Jun 2026 10:05:05 +0200 Subject: [PATCH 4/5] fix: linter --- ios/extensions/StringExtension.mm | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ios/extensions/StringExtension.mm b/ios/extensions/StringExtension.mm index 1075b981..69ab45da 100644 --- a/ios/extensions/StringExtension.mm +++ b/ios/extensions/StringExtension.mm @@ -83,9 +83,10 @@ + (NSDictionary *)getEscapedCharactersInfoFrom:(NSString *)text { codePoint = (UTF32Char)strtoul(decStr, NULL, 10); } - // Safety check: Valid Unicode scalar values are 0x1..0x10FFFF, - // excluding surrogate code points (0xD800-0xDFFF). Replace invalid - // values with U+FFFD (Replacement Character) to avoid crashes/truncation. + // Safety check: Valid Unicode scalar values are + // 0x1..0x10FFFF, excluding surrogate code points + // (0xD800-0xDFFF). Replace invalid values with U+FFFD + // (Replacement Character) to avoid crashes/truncation. if (codePoint == 0 || codePoint > 0x10FFFF || (codePoint >= 0xD800 && codePoint <= 0xDFFF)) { codePoint = 0xFFFD; From 2ff6ef5147ebf93fa8e05d84c2ba2e3da59874c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kacper=20=C5=BB=C3=B3=C5=82kiewski?= Date: Mon, 15 Jun 2026 10:23:07 +0200 Subject: [PATCH 5/5] fix: handle nil case --- ios/extensions/StringExtension.mm | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/ios/extensions/StringExtension.mm b/ios/extensions/StringExtension.mm index 69ab45da..6f0a9d5a 100644 --- a/ios/extensions/StringExtension.mm +++ b/ios/extensions/StringExtension.mm @@ -63,6 +63,9 @@ + (NSDictionary *)getEscapedCharactersInfoFrom:(NSString *)text { range:NSMakeRange(0, text.length) usingBlock:^(NSTextCheckingResult *match, NSMatchingFlags flags, BOOL *stop) { + if (match == nil) { + return; + } NSRange fullRange = [match range]; NSString *entityStr = [text substringWithRange:fullRange]; NSString *valueStr = @@ -83,10 +86,12 @@ + (NSDictionary *)getEscapedCharactersInfoFrom:(NSString *)text { codePoint = (UTF32Char)strtoul(decStr, NULL, 10); } - // Safety check: Valid Unicode scalar values are - // 0x1..0x10FFFF, excluding surrogate code points - // (0xD800-0xDFFF). Replace invalid values with U+FFFD - // (Replacement Character) to avoid crashes/truncation. + // Safety check: HTML numeric character references should + // map to valid Unicode scalar values (0x0..0x10FFFF), + // excluding surrogate code points (0xD800-0xDFFF). Per + // HTML5, code point 0 is treated as U+FFFD. Replace + // invalid values with U+FFFD (Replacement Character) to + // avoid crashes/truncation. if (codePoint == 0 || codePoint > 0x10FFFF || (codePoint >= 0xD800 && codePoint <= 0xDFFF)) { codePoint = 0xFFFD;