diff --git a/tests/test_api.py b/tests/test_api.py index b49c90a..9defd60 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -511,6 +511,10 @@ def test_create_super_table_with_aot() -> None: ({"multiline": True}, 'My""String', '"""My""String"""'), ({"multiline": True}, 'My"""String', '"""My""\\"String"""'), ({"multiline": True}, 'My""""String', '"""My""\\""String"""'), + # a lone CR is a control char and is escaped, but a CRLF line ending is + # left intact + ({"multiline": True}, "My\rString", '"""My\\rString"""'), + ({"multiline": True}, "My\r\nString", '"""My\r\nString"""'), ( {"multiline": True}, '"""My"""Str"""ing"""', @@ -548,6 +552,9 @@ def test_create_string(kwargs: dict[str, Any], example: str, expected: str) -> N ({"literal": True}, "My\x0cString"), ({"literal": True}, "My\x7fString"), ({"multiline": True, "literal": True}, "My'''String"), + # a lone CR cannot be represented in a multiline literal string + ({"multiline": True, "literal": True}, "My\rString"), + ({"multiline": True, "literal": True}, "My\r\nMy\rString"), ], ) def test_create_string_with_invalid_characters( diff --git a/tomlkit/items.py b/tomlkit/items.py index 6cbb29a..8b4d6c6 100644 --- a/tomlkit/items.py +++ b/tomlkit/items.py @@ -232,6 +232,12 @@ def item(value: Any, _parent: Item | None = None, _sort_keys: bool = False) -> I raise ConvertError(f"Unable to convert an object of {type(value)} to a TOML item") +# A carriage return that is not the CR of a CRLF line ending. Inside a multiline +# string only a raw line feed or a CRLF pair is a valid line ending; a lone CR is +# a control character. Literal strings cannot escape it, basic strings must. +_BARE_CR = re.compile(r"\r(?!\n)") + + class StringType(Enum): # Single Line Basic SLB = '"' @@ -2239,9 +2245,15 @@ def from_raw( if any(c in value for c in invalid): raise InvalidStringError(value, invalid, type_.value) + if type_ is StringType.MLL and _BARE_CR.search(value): + raise InvalidStringError(value, {"\r"}, type_.value) + escaped = type_.escaped_sequences string_value = escape_string(value, escaped) if escape and escaped else value + if type_ is StringType.MLB and escape: + string_value = _BARE_CR.sub(r"\\r", string_value) + return cls(type_, decode(value), string_value, Trivia())