Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions Lib/test/test_json/test_unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,42 @@ def test_object_pairs_hook_with_unicode(self):
object_hook = lambda x: None),
OrderedDict(p))

def test_ensure_ascii_false_long_string_paths(self):
# Cover the SWAR scan in _json escape_size(): it inspects eight bytes
# per iteration, so exercise runs that cross the 8-byte windows and the
# short-string guard with a special character at every offset.
dumps, loads = self.dumps, self.loads

def is_optimized(s):
# The no-escape fast path returns the string verbatim in quotes.
self.assertEqual(dumps(s, ensure_ascii=False), f'"{s}"')

# Bytes that are kept as-is, including Latin-1 and 0x7f, stay verbatim.
for s in ("abc", "\xe9", "kept latin1 \xe9\xff \x7f text"):
is_optimized(s)
is_optimized(s * 8)

def need_escape(s, expected):
encoded = dumps(s, ensure_ascii=False)
self.assertEqual(encoded, expected)
self.assertEqual(loads(encoded), s)

tail = "tail"
for n in range(40):
run = "a" * n
for char, escaped in (('"', '\\"'), ("\\", "\\\\"), ("\n", "\\n"),
("\x00", "\\u0000"), ("\x1f", "\\u001f")):
need_escape(run + char + tail, f'"{run}{escaped}{tail}"')
for char in ("\x7f", "\xe9", "中", "\U0001f600"):
s = run + char + tail
need_escape(s, f'"{s}"')

# Structural escapes and control characters are still escaped after a
# long no-escape run.
base = "a" * 20
for char, escaped in (('"', '\\"'), ("\\", "\\\\"), ("\x01", "\\u0001")):
need_escape(base + char, f'"{base}{escaped}"')


class TestPyUnicode(TestUnicode, PyTest): pass
class TestCUnicode(TestUnicode, CTest): pass
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Speed up :func:`json.dumps` with ``ensure_ascii=False`` for strings made up of
long runs of characters that need no escaping, by scanning eight bytes at a
time (roughly 1.5x faster for long ASCII or Latin-1 strings). Short strings,
strings that need escaping, and strings with characters above U+00FF are
unaffected. Patch by Bernát Gábor.
51 changes: 51 additions & 0 deletions Modules/_json.c
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,57 @@ escape_size(const void *input, int kind, Py_ssize_t input_chars)
Py_ssize_t i;
Py_ssize_t output_size;

/* SWAR no-escape fast path (1-byte): in this 1-byte (Latin-1) mode a code
point needs escaping only when c == '"', c == '\\', or c < 0x20; any other
byte, including non-ASCII (>= 0x80), is copied verbatim. Scan eight bytes
per iteration and drop to the per-character loop at the first byte that
needs escaping. The loop reads one 8-byte word at a time, so strings
shorter than a word stay on that per-character loop, where the setup
below would not pay off. */
if (kind == PyUnicode_1BYTE_KIND && input_chars >= 8
/* the output is input_chars + 2 (the surrounding quotes); keep that
addition below from overflowing Py_ssize_t */
&& input_chars < PY_SSIZE_T_MAX - 2) {
Comment thread
gaborbernat marked this conversation as resolved.
const Py_UCS1 *p = (const Py_UCS1 *)input;
const uint64_t ones = 0x0101010101010101ULL; /* 1 in every byte lane */
const uint64_t high = 0x8080808080808080ULL; /* high bit of every lane */
const uint64_t bq = 0x22ULL * ones; /* '"' broadcast to all 8 lanes */
const uint64_t bs = 0x5cULL * ones; /* '\\' broadcast to all 8 lanes */
const uint64_t bc = 0xE0ULL * ones; /* 0xE0 per lane; w & bc is zero in
a lane exactly when its byte is
< 0x20 (top three bits clear) */
Py_ssize_t j = 0;
int needs_escape = 0;
for (; j + 8 <= input_chars; j += 8) {
uint64_t w;
memcpy(&w, p + j, 8);
/* (v - ones) & ~v & high lights a lane's high bit exactly when that
lane is zero, so each mask flags the lanes that matched. */
uint64_t mq = w ^ bq;
mq = (mq - ones) & ~mq & high; /* lanes equal to '"' */
uint64_t ms = w ^ bs;
ms = (ms - ones) & ~ms & high; /* lanes equal to '\\' */
uint64_t vc = w & bc;
uint64_t mlo = (vc - ones) & ~vc & high; /* lanes < 0x20 */
if (mq | ms | mlo) {
needs_escape = 1;
break;
}
}
if (!needs_escape) {
for (; j < input_chars; j++) {
Py_UCS1 c = p[j];
if (c == '"' || c == '\\' || c < 0x20) {
needs_escape = 1;
break;
}
}
}
if (!needs_escape) {
return input_chars + 2;
}
}

/* Compute the output size */
for (i = 0, output_size = 2; i < input_chars; i++) {
Py_UCS4 c = PyUnicode_READ(kind, input, i);
Expand Down
Loading