Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions include/json/reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,11 @@ class JSON_API CharReaderBuilder : public CharReader::Factory {
* - `"allowSpecialFloats": false or true`
* - If true, special float values (NaNs and infinities) are allowed and
* their values are lossfree restorable.
* - `"rejectInvalidSurrogates": false or true`
* - If true, `parse()` returns false when a `\u` escape is a lone or
* mismatched UTF-16 surrogate half (a high surrogate not followed by a
* low surrogate, or a low surrogate with no preceding high surrogate).
* - Disabled by ecma404Mode(), which treats such escapes as conforming.
* - `"skipBom": false or true`
* - If true, if the input starts with the Unicode byte order mark (BOM),
* it is skipped.
Expand Down
17 changes: 17 additions & 0 deletions src/lib_json/json_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -861,6 +861,7 @@ class OurFeatures {
bool failIfExtra_;
bool rejectDupKeys_;
bool allowSpecialFloats_;
bool rejectInvalidSurrogates_;
bool skipBom_;
size_t stackLimit_;
}; // OurFeatures
Expand Down Expand Up @@ -1759,13 +1760,23 @@ bool OurReader::decodeUnicodeCodePoint(Token& token, Location& current,
if (*(current++) == '\\' && *(current++) == 'u') {
unsigned int surrogatePair;
if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
if (features_.rejectInvalidSurrogates_ &&
(surrogatePair < 0xDC00 || surrogatePair > 0xDFFF))
return addError("expecting a low surrogate (DC00-DFFF) to complete "
"the unicode surrogate pair",
token, current);
unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
} else
return false;
} else
return addError("expecting another \\u token to begin the second half of "
"a unicode surrogate pair",
token, current);
} else if (features_.rejectInvalidSurrogates_ && unicode >= 0xDC00 &&
unicode <= 0xDFFF) {
return addError("unexpected low surrogate (DC00-DFFF); a high surrogate "
"(D800-DBFF) must come first",
token, current);
}
return true;
}
Expand Down Expand Up @@ -1940,6 +1951,8 @@ CharReader* CharReaderBuilder::newCharReader() const {
features.failIfExtra_ = settings_["failIfExtra"].asBool();
features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool();
features.allowSpecialFloats_ = settings_["allowSpecialFloats"].asBool();
features.rejectInvalidSurrogates_ =
settings_["rejectInvalidSurrogates"].asBool();
features.skipBom_ = settings_["skipBom"].asBool();
return new OurCharReader(collectComments, features);
}
Expand All @@ -1957,6 +1970,7 @@ bool CharReaderBuilder::validate(Json::Value* invalid) const {
"failIfExtra",
"rejectDupKeys",
"allowSpecialFloats",
"rejectInvalidSurrogates",
"skipBom",
};
for (auto si = settings_.begin(); si != settings_.end(); ++si) {
Expand Down Expand Up @@ -1987,6 +2001,7 @@ void CharReaderBuilder::strictMode(Json::Value* settings) {
(*settings)["failIfExtra"] = true;
(*settings)["rejectDupKeys"] = true;
(*settings)["allowSpecialFloats"] = false;
(*settings)["rejectInvalidSurrogates"] = true;
(*settings)["skipBom"] = true;
//! [CharReaderBuilderStrictMode]
}
Expand All @@ -2004,6 +2019,7 @@ void CharReaderBuilder::setDefaults(Json::Value* settings) {
(*settings)["failIfExtra"] = false;
(*settings)["rejectDupKeys"] = false;
(*settings)["allowSpecialFloats"] = false;
(*settings)["rejectInvalidSurrogates"] = true;
(*settings)["skipBom"] = true;
//! [CharReaderBuilderDefaults]
}
Expand All @@ -2020,6 +2036,7 @@ void CharReaderBuilder::ecma404Mode(Json::Value* settings) {
(*settings)["failIfExtra"] = true;
(*settings)["rejectDupKeys"] = false;
(*settings)["allowSpecialFloats"] = false;
(*settings)["rejectInvalidSurrogates"] = false;
(*settings)["skipBom"] = false;
//! [CharReaderBuilderECMA404Mode]
}
Expand Down
29 changes: 29 additions & 0 deletions src/test_lib_json/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3322,6 +3322,35 @@ JSONTEST_FIXTURE_LOCAL(CharReaderTest, parseString) {
"second half of a unicode surrogate pair\n"
"See Line 1, Column 12 for detail.\n");
}
{
char const doc[] = R"([ "\uD801\u0041" ])";

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No tests for the legacy reader change?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dropped the legacy Reader change entirely, so there's no behavior change left there to test. It has no Features knob to gate this on, and keeping it lenient avoids breaking input that parsed before. The validation now lives only in OurReader behind the rejectInvalidSurrogates setting.

bool ok = reader->parse(doc, doc + std::strlen(doc), &root, &errs);
JSONTEST_ASSERT(!ok);
JSONTEST_ASSERT(errs == "* Line 1, Column 3\n"
" expecting a low surrogate (DC00-DFFF) to "
"complete the unicode surrogate pair\n"
"See Line 1, Column 16 for detail.\n");
}
{
char const doc[] = R"([ "\uDC00" ])";
bool ok = reader->parse(doc, doc + std::strlen(doc), &root, &errs);
JSONTEST_ASSERT(!ok);
JSONTEST_ASSERT(errs == "* Line 1, Column 3\n"
" unexpected low surrogate (DC00-DFFF); a high "
"surrogate (D800-DBFF) must come first\n"
"See Line 1, Column 10 for detail.\n");
}
{
// The escape hatch: with rejectInvalidSurrogates off, the lenient path
// keeps the pre-existing behaviour of passing lone surrogates through.
Json::CharReaderBuilder lenient;
lenient["rejectInvalidSurrogates"] = false;
CharReaderPtr lenientReader(lenient.newCharReader());
char const doc[] = R"([ "\uDC00" ])";
bool ok = lenientReader->parse(doc, doc + std::strlen(doc), &root, &errs);
JSONTEST_ASSERT(ok);
JSONTEST_ASSERT(errs.empty());
}
{
char const doc[] = R"([ "\ua3t@" ])";
bool ok = reader->parse(doc, doc + std::strlen(doc), &root, &errs);
Expand Down
Loading