diff --git a/src/core/idna/codegen.py b/src/core/idna/codegen.py index a203874e7..65c64fff3 100644 --- a/src/core/idna/codegen.py +++ b/src/core/idna/codegen.py @@ -11,7 +11,7 @@ PAGE_SIZE = 1 << PAGE_SHIFT NUM_PAGES = TOTAL_CODEPOINTS // PAGE_SIZE -# Integer values must match the IdnaProperty enum in idna_ucd.h. +# Integer values must match the IDNAProperty enum in idna_ucd.h. IDNA_PROPERTY_VALUES = { "PVALID": 0, "CONTEXTJ": 1, diff --git a/src/core/idna/idna.cc b/src/core/idna/idna.cc index d9f932966..d0db13af7 100644 --- a/src/core/idna/idna.cc +++ b/src/core/idna/idna.cc @@ -149,7 +149,6 @@ auto idna_passes_contextj(const std::u32string_view label, return true; } -// TODO: Reject labels that are not NFC-normalized auto idna_is_valid_u_label(const std::u32string_view label) noexcept -> bool { if (label.empty()) { return false; @@ -257,7 +256,9 @@ auto idna_passes_bidi_rule(const std::u32string_view label) noexcept -> bool { } } - // RFC 5893 §2 condition 4: an RTL label cannot have both EN and AN. + // RFC 5893 §2 condition 4 (RTL labels only): cannot have both EN and AN. + // The "and vice versa" wording in the RFC is internal to the RTL branch + // (EN/AN exclusivity within RTL), not symmetry to LTR. if (is_rtl_label && has_european_number && has_arabic_number) { return false; } diff --git a/src/core/idna/include/sourcemeta/core/idna.h b/src/core/idna/include/sourcemeta/core/idna.h index d6f2c7406..8f5fdf30e 100644 --- a/src/core/idna/include/sourcemeta/core/idna.h +++ b/src/core/idna/include/sourcemeta/core/idna.h @@ -41,8 +41,9 @@ auto idna_property(const char32_t codepoint) noexcept -> IDNAProperty; /// @ingroup idna /// Return whether the codepoint at `position` within `label` does not /// violate any RFC 5892 Appendix A.3-A.9 contextual rule. Returns true -/// vacuously when the codepoint has no such rule. See -/// https://www.rfc-editor.org/rfc/rfc5892#appendix-A for the rules. +/// vacuously when the codepoint has no such rule. Returns false when +/// `position` is out of range, treated as a precondition violation. +/// See https://www.rfc-editor.org/rfc/rfc5892#appendix-A for the rules. /// For example: /// /// ```cpp @@ -59,8 +60,9 @@ auto idna_passes_contexto(const std::u32string_view label, /// @ingroup idna /// Return whether the codepoint at `position` within `label` does not /// violate any RFC 5892 Appendix A.1 / A.2 contextual rule. Returns true -/// vacuously when the codepoint has no such rule. See -/// https://www.rfc-editor.org/rfc/rfc5892#appendix-A for the rules. +/// vacuously when the codepoint has no such rule. Returns false when +/// `position` is out of range, treated as a precondition violation. +/// See https://www.rfc-editor.org/rfc/rfc5892#appendix-A for the rules. /// For example: /// /// ```cpp @@ -102,7 +104,9 @@ auto idna_passes_bidi_rule(const std::u32string_view label) noexcept -> bool; /// Return whether the given label is a valid U-label per RFC 5891 §4. See /// https://www.rfc-editor.org/rfc/rfc5891#section-4 for the criteria. /// The Bidi rule is not checked here because Bidi domain detection is a -/// property of the whole domain, not of a single label. For example: +/// property of the whole domain, not of a single label. Callers needing +/// strict conformance must normalise the input to NFC themselves. The +/// RFC 5891 §4.1.2.A NFC requirement is not enforced. For example: /// /// ```cpp /// #include