From 0ececcbc7728e9d1dcaf375fe9e8c66e278ba64e Mon Sep 17 00:00:00 2001 From: Harshal Patel Date: Fri, 5 Jun 2026 21:56:20 +0530 Subject: [PATCH 1/3] fix: make Contains and ElementsMatch rune-safe for Unicode strings (#1518) --- assert/assertions.go | 27 +++++++++++- assert/assertions_test.go | 86 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+), 1 deletion(-) diff --git a/assert/assertions.go b/assert/assertions.go index 1419e4776..e99898642 100644 --- a/assert/assertions.go +++ b/assert/assertions.go @@ -931,7 +931,7 @@ func containsElement(list interface{}, element interface{}) (ok, found bool) { if listKind == reflect.String { elementValue := reflect.ValueOf(element) - return true, strings.Contains(listValue.String(), elementValue.String()) + return true, runeSliceContains([]rune(listValue.String()), []rune(elementValue.String())) } if listKind == reflect.Map { @@ -2312,3 +2312,28 @@ func buildErrorChainString(err error, withType bool) string { } return chain } + +// runeSliceContains reports whether needle appears as a contiguous sub-slice +// of haystack. Comparisons are performed rune-by-rune, so the search is safe +// across multi-byte Unicode boundaries. +func runeSliceContains(haystack, needle []rune) bool { + if len(needle) == 0 { + return true + } + if len(needle) > len(haystack) { + return false + } + for i := 0; i <= len(haystack)-len(needle); i++ { + match := true + for j := 0; j < len(needle); j++ { + if haystack[i+j] != needle[j] { + match = false + break + } + } + if match { + return true + } + } + return false +} diff --git a/assert/assertions_test.go b/assert/assertions_test.go index 11642e096..140378936 100644 --- a/assert/assertions_test.go +++ b/assert/assertions_test.go @@ -4217,3 +4217,89 @@ func TestNotErrorAsWithErrorTooLongToPrint(t *testing.T) { in chain: "long: [0 0 0`) Contains(t, mockT.errorString(), "<... truncated>") } + +// TestContainsUnicode verifies that Contains does not falsely match a byte +// fragment that straddles a Unicode rune boundary in the haystack string. +// The old strings.Contains implementation operated on raw bytes, which could +// produce false positives when the needle bytes happened to align with the +// internal encoding of adjacent multi-byte runes. runeSliceContains fixes +// this by working at the rune level. +func TestContainsUnicode(t *testing.T) { + t.Parallel() + mockT := new(testing.T) + + // Each emoji is a 4-byte UTF-8 sequence. + // "🌟" = 0xF0 0x9F 0x8C 0x9F + // "πŸŽ‰" = 0xF0 0x9F 0x8E 0x89 + // The two-byte sequence "\x9f\x8e" is formed by the last byte of 🌟 + // and the second byte of πŸŽ‰. strings.Contains would find it; runeSliceContains + // must NOT, because "\x9f\x8e" does not correspond to any rune in the string. + haystack := "πŸŒŸπŸŽ‰" + byteFragment := "\x9f\x8e" // straddles the rune boundary between the two emojis + + // Confirm the fragment is NOT a valid rune sequence (sanity check). + // The real assertion: Contains must return false for a byte-only match. + False(t, Contains(mockT, haystack, byteFragment), + "Contains should not match a raw byte fragment that straddles a rune boundary") + + // Positive cases: real rune substrings must still be found. + True(t, Contains(mockT, haystack, "🌟"), + "Contains should find an exact emoji in the string") + True(t, Contains(mockT, haystack, "πŸŽ‰"), + "Contains should find an exact emoji in the string") + True(t, Contains(mockT, haystack, "πŸŒŸπŸŽ‰"), + "Contains should find the full emoji string") + + // Empty needle is always found. + True(t, Contains(mockT, haystack, ""), + "Contains should find an empty string in any string") + + // ASCII inside a Unicode string still works. + True(t, Contains(mockT, "hello 🌍 world", "world"), + "Contains should find ASCII substring in a Unicode string") + False(t, Contains(mockT, "hello 🌍 world", "earth"), + "Contains should not find absent ASCII substring") + + // Multi-byte CJK characters. + True(t, Contains(mockT, "ζ—₯本θͺžγƒ†γ‚Ήγƒˆ", "γƒ†γ‚Ήγƒˆ"), + "Contains should find a CJK substring") + False(t, Contains(mockT, "ζ—₯本θͺžγƒ†γ‚Ήγƒˆ", "δΈ­ζ–‡"), + "Contains should not find absent CJK substring") +} + +// TestElementsMatchUnicode verifies that ElementsMatch correctly compares +// slices whose elements are multi-byte Unicode strings (emojis, CJK, accented +// characters). Because diffLists delegates to ObjectsAreEqual β†’ reflect.DeepEqual +// for whole-string element comparison, these cases already work correctly; this +// test documents and locks in that behaviour. +func TestElementsMatchUnicode(t *testing.T) { + t.Parallel() + mockT := new(testing.T) + + // Emoji elements β€” order should not matter. + True(t, ElementsMatch(mockT, []string{"πŸŽ‰", "🌟", "🌍"}, []string{"🌍", "πŸŽ‰", "🌟"}), + "ElementsMatch should match emoji slices regardless of order") + + // Duplicate emoji elements must also match count-for-count. + True(t, ElementsMatch(mockT, []string{"πŸŽ‰", "πŸŽ‰", "🌟"}, []string{"🌟", "πŸŽ‰", "πŸŽ‰"}), + "ElementsMatch should respect duplicate emoji counts") + + // Mismatched emoji slices must not match. + False(t, ElementsMatch(mockT, []string{"πŸŽ‰", "🌟"}, []string{"πŸŽ‰", "🌍"}), + "ElementsMatch should reject slices with different emoji elements") + + // CJK characters. + True(t, ElementsMatch(mockT, []string{"ζ—₯本θͺž", "γƒ†γ‚Ήγƒˆ"}, []string{"γƒ†γ‚Ήγƒˆ", "ζ—₯本θͺž"}), + "ElementsMatch should match CJK string slices regardless of order") + + // Accented Latin characters. + True(t, ElementsMatch(mockT, []string{"cafΓ©", "naΓ―ve", "rΓ©sumΓ©"}, []string{"rΓ©sumΓ©", "cafΓ©", "naΓ―ve"}), + "ElementsMatch should match accented Latin strings regardless of order") + + // Mixed ASCII and Unicode. + True(t, ElementsMatch(mockT, + []string{"hello", "🌍", "δΈ–η•Œ"}, + []string{"δΈ–η•Œ", "hello", "🌍"}), + "ElementsMatch should match mixed ASCII/Unicode slices") +} + From 288af152880c907aeac892aa853cf9ad94d72653 Mon Sep 17 00:00:00 2001 From: Harshal Patel Date: Fri, 5 Jun 2026 22:10:41 +0530 Subject: [PATCH 2/3] style: run gofmt to fix trailing newline in assertions_test.go --- assert/assertions_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/assert/assertions_test.go b/assert/assertions_test.go index 140378936..474b9d7f6 100644 --- a/assert/assertions_test.go +++ b/assert/assertions_test.go @@ -4302,4 +4302,3 @@ func TestElementsMatchUnicode(t *testing.T) { []string{"δΈ–η•Œ", "hello", "🌍"}), "ElementsMatch should match mixed ASCII/Unicode slices") } - From 339f113c6ea2b6dae2925b03e6ac72c6b5ca3cea Mon Sep 17 00:00:00 2001 From: Harshal Patel Date: Fri, 5 Jun 2026 22:18:39 +0530 Subject: [PATCH 3/3] perf(assert): add strings.Contains gating fast-path and fix unsafe testing.T instantiation --- assert/assertions.go | 10 +++++++++- assert/assertions_test.go | 11 +++-------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/assert/assertions.go b/assert/assertions.go index e99898642..6a3be8655 100644 --- a/assert/assertions.go +++ b/assert/assertions.go @@ -931,7 +931,15 @@ func containsElement(list interface{}, element interface{}) (ok, found bool) { if listKind == reflect.String { elementValue := reflect.ValueOf(element) - return true, runeSliceContains([]rune(listValue.String()), []rune(elementValue.String())) + haystack := listValue.String() + needle := elementValue.String() + // Fast-gate: if the bytes aren't present at all, skip the rune + // conversion entirely β€” no rune boundary can be falsely straddled + // when there are no matching bytes. + if !strings.Contains(haystack, needle) { + return true, false + } + return true, runeSliceContains([]rune(haystack), []rune(needle)) } if listKind == reflect.Map { diff --git a/assert/assertions_test.go b/assert/assertions_test.go index 474b9d7f6..14b02b0ca 100644 --- a/assert/assertions_test.go +++ b/assert/assertions_test.go @@ -4218,15 +4218,10 @@ func TestNotErrorAsWithErrorTooLongToPrint(t *testing.T) { Contains(t, mockT.errorString(), "<... truncated>") } -// TestContainsUnicode verifies that Contains does not falsely match a byte -// fragment that straddles a Unicode rune boundary in the haystack string. -// The old strings.Contains implementation operated on raw bytes, which could -// produce false positives when the needle bytes happened to align with the -// internal encoding of adjacent multi-byte runes. runeSliceContains fixes -// this by working at the rune level. +// Verify Contains correctly protects against boundary-straddling false positives. func TestContainsUnicode(t *testing.T) { t.Parallel() - mockT := new(testing.T) + mockT := new(mockTestingT) // Each emoji is a 4-byte UTF-8 sequence. // "🌟" = 0xF0 0x9F 0x8C 0x9F @@ -4274,7 +4269,7 @@ func TestContainsUnicode(t *testing.T) { // test documents and locks in that behaviour. func TestElementsMatchUnicode(t *testing.T) { t.Parallel() - mockT := new(testing.T) + mockT := new(mockTestingT) // Emoji elements β€” order should not matter. True(t, ElementsMatch(mockT, []string{"πŸŽ‰", "🌟", "🌍"}, []string{"🌍", "πŸŽ‰", "🌟"}),