Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/project/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,12 @@ Improvements

* Added wheels for ARMv7, PowerPC, RISC-V, and S/390.

Bug fixes
.........

* Prevented an exception when logging a text frame that splits a multi-byte
UTF-8 sequence across fragments.

.. _16.0:

16.0
Expand Down
30 changes: 17 additions & 13 deletions src/websockets/frames.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,15 @@ class Frame:
# Configure if you want to see more in logs. Should be a multiple of 3.
MAX_LOG_SIZE = int(os.environ.get("WEBSOCKETS_MAX_LOG_SIZE", "75"))

def _format_binary(self) -> str:
# We'll show at most the first 16 bytes and the last 8 bytes.
# Encode just what we need, plus two dummy bytes to elide later.
binary = self.data
if len(binary) > self.MAX_LOG_SIZE // 3:
cut = (self.MAX_LOG_SIZE // 3 - 1) // 3 # by default cut = 8
binary = b"".join([binary[: 2 * cut], b"\x00\x00", binary[-cut:]])
return " ".join(f"{byte:02x}" for byte in binary)

def __str__(self) -> str:
"""
Return a human-readable representation of a frame.
Expand All @@ -159,15 +168,14 @@ def __str__(self) -> str:
if self.opcode is OP_TEXT:
# Decoding only the beginning and the end is needlessly hard.
# Decode the entire payload then elide later if necessary.
data = repr(bytes(self.data).decode())
# Fragmentation may split a multi-byte UTF-8 sequence; fall back to
# a binary representation when the payload doesn't decode cleanly.
try:
data = repr(bytes(self.data).decode())
except UnicodeDecodeError:
data = self._format_binary()
elif self.opcode is OP_BINARY:
# We'll show at most the first 16 bytes and the last 8 bytes.
# Encode just what we need, plus two dummy bytes to elide later.
binary = self.data
if len(binary) > self.MAX_LOG_SIZE // 3:
cut = (self.MAX_LOG_SIZE // 3 - 1) // 3 # by default cut = 8
binary = b"".join([binary[: 2 * cut], b"\x00\x00", binary[-cut:]])
data = " ".join(f"{byte:02x}" for byte in binary)
data = self._format_binary()
elif self.opcode is OP_CLOSE:
data = str(Close.parse(self.data))
elif self.data:
Expand All @@ -180,11 +188,7 @@ def __str__(self) -> str:
data = repr(bytes(self.data).decode())
coding = "text"
except (UnicodeDecodeError, AttributeError):
binary = self.data
if len(binary) > self.MAX_LOG_SIZE // 3:
cut = (self.MAX_LOG_SIZE // 3 - 1) // 3 # by default cut = 8
binary = b"".join([binary[: 2 * cut], b"\x00\x00", binary[-cut:]])
data = " ".join(f"{byte:02x}" for byte in binary)
data = self._format_binary()
coding = "binary"
else:
data = "''"
Expand Down
13 changes: 13 additions & 0 deletions tests/test_frames.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,19 @@ def test_text_with_newline(self):
"TEXT 'Hello\\nworld!' [12 bytes]",
)

def test_text_fragment_with_partial_utf8(self):
self.assertEqual(
str(Frame(OP_TEXT, b" cr\xc3", fin=False)),
"TEXT 20 63 72 c3 [4 bytes, continued]",
)

def test_text_fragment_with_partial_utf8_truncated(self):
self.assertEqual(
str(Frame(OP_TEXT, "café ".encode() * 16 + b"\xc3", fin=False)),
"TEXT 63 61 66 c3 a9 20 63 61 66 c3 a9 20 63 61 66 c3 ..."
" 20 63 61 66 c3 a9 20 c3 [97 bytes, continued]",
)

def test_binary(self):
self.assertEqual(
str(Frame(OP_BINARY, b"\x00\x01\x02\x03")),
Expand Down