open-source-parsers · SABITHSAHEB · Jun 17, 2026 · Jul 1, 2026 · Jul 4, 2026 · baylesj
diff --git a/include/json/reader.h b/include/json/reader.h
@@ -352,6 +352,11 @@ class JSON_API CharReaderBuilder : public CharReader::Factory {
    * - `"allowSpecialFloats": false or true`
    *   - If true, special float values (NaNs and infinities) are allowed and
    *     their values are lossfree restorable.
+   * - `"rejectInvalidSurrogates": false or true`
+   *   - If true, `parse()` returns false when a `\u` escape is a lone or
+   *     mismatched UTF-16 surrogate half (a high surrogate not followed by a
+   *     low surrogate, or a low surrogate with no preceding high surrogate).
+   *   - Disabled by ecma404Mode(), which treats such escapes as conforming.
    * - `"skipBom": false or true`
    *   - If true, if the input starts with the Unicode byte order mark (BOM),
    *     it is skipped.

diff --git a/src/lib_json/json_reader.cpp b/src/lib_json/json_reader.cpp
@@ -861,6 +861,7 @@ class OurFeatures {
   bool failIfExtra_;
   bool rejectDupKeys_;
   bool allowSpecialFloats_;
+  bool rejectInvalidSurrogates_;
   bool skipBom_;
   size_t stackLimit_;
 }; // OurFeatures
@@ -1759,13 +1760,23 @@ bool OurReader::decodeUnicodeCodePoint(Token& token, Location& current,
     if (*(current++) == '\\' && *(current++) == 'u') {
       unsigned int surrogatePair;
       if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
+        if (features_.rejectInvalidSurrogates_ &&
+            (surrogatePair < 0xDC00 || surrogatePair > 0xDFFF))
+          return addError("expecting a low surrogate (DC00-DFFF) to complete "
+                          "the unicode surrogate pair",
+                          token, current);
         unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
       } else
         return false;
     } else
       return addError("expecting another \\u token to begin the second half of "
                       "a unicode surrogate pair",
                       token, current);
+  } else if (features_.rejectInvalidSurrogates_ && unicode >= 0xDC00 &&
+             unicode <= 0xDFFF) {
+    return addError("unexpected low surrogate (DC00-DFFF); a high surrogate "
+                    "(D800-DBFF) must come first",
+                    token, current);
   }
   return true;
 }
@@ -1940,6 +1951,8 @@ CharReader* CharReaderBuilder::newCharReader() const {
   features.failIfExtra_ = settings_["failIfExtra"].asBool();
   features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool();
   features.allowSpecialFloats_ = settings_["allowSpecialFloats"].asBool();
+  features.rejectInvalidSurrogates_ =
+      settings_["rejectInvalidSurrogates"].asBool();
   features.skipBom_ = settings_["skipBom"].asBool();
   return new OurCharReader(collectComments, features);
 }
@@ -1957,6 +1970,7 @@ bool CharReaderBuilder::validate(Json::Value* invalid) const {
       "failIfExtra",
       "rejectDupKeys",
       "allowSpecialFloats",
+      "rejectInvalidSurrogates",
       "skipBom",
   };
   for (auto si = settings_.begin(); si != settings_.end(); ++si) {
@@ -1987,6 +2001,7 @@ void CharReaderBuilder::strictMode(Json::Value* settings) {
   (*settings)["failIfExtra"] = true;
   (*settings)["rejectDupKeys"] = true;
   (*settings)["allowSpecialFloats"] = false;
+  (*settings)["rejectInvalidSurrogates"] = true;
   (*settings)["skipBom"] = true;
   //! [CharReaderBuilderStrictMode]
 }
@@ -2004,6 +2019,7 @@ void CharReaderBuilder::setDefaults(Json::Value* settings) {
   (*settings)["failIfExtra"] = false;
   (*settings)["rejectDupKeys"] = false;
   (*settings)["allowSpecialFloats"] = false;
+  (*settings)["rejectInvalidSurrogates"] = true;
   (*settings)["skipBom"] = true;
   //! [CharReaderBuilderDefaults]
 }
@@ -2020,6 +2036,7 @@ void CharReaderBuilder::ecma404Mode(Json::Value* settings) {
   (*settings)["failIfExtra"] = true;
   (*settings)["rejectDupKeys"] = false;
   (*settings)["allowSpecialFloats"] = false;
+  (*settings)["rejectInvalidSurrogates"] = false;
   (*settings)["skipBom"] = false;
   //! [CharReaderBuilderECMA404Mode]
 }

diff --git a/src/test_lib_json/main.cpp b/src/test_lib_json/main.cpp
@@ -3322,6 +3322,35 @@ JSONTEST_FIXTURE_LOCAL(CharReaderTest, parseString) {
                             "second half of a unicode surrogate pair\n"
                             "See Line 1, Column 12 for detail.\n");
   }
+  {
+    char const doc[] = R"([ "\uD801\u0041" ])";
+    bool ok = reader->parse(doc, doc + std::strlen(doc), &root, &errs);
+    JSONTEST_ASSERT(!ok);
+    JSONTEST_ASSERT(errs == "* Line 1, Column 3\n"
+                            "  expecting a low surrogate (DC00-DFFF) to "
+                            "complete the unicode surrogate pair\n"
+                            "See Line 1, Column 16 for detail.\n");
+  }
+  {
+    char const doc[] = R"([ "\uDC00" ])";
+    bool ok = reader->parse(doc, doc + std::strlen(doc), &root, &errs);
+    JSONTEST_ASSERT(!ok);
+    JSONTEST_ASSERT(errs == "* Line 1, Column 3\n"
+                            "  unexpected low surrogate (DC00-DFFF); a high "
+                            "surrogate (D800-DBFF) must come first\n"
+                            "See Line 1, Column 10 for detail.\n");
+  }
+  {
+    // The escape hatch: with rejectInvalidSurrogates off, the lenient path
+    // keeps the pre-existing behaviour of passing lone surrogates through.
+    Json::CharReaderBuilder lenient;
+    lenient["rejectInvalidSurrogates"] = false;
+    CharReaderPtr lenientReader(lenient.newCharReader());
+    char const doc[] = R"([ "\uDC00" ])";
+    bool ok = lenientReader->parse(doc, doc + std::strlen(doc), &root, &errs);
+    JSONTEST_ASSERT(ok);
+    JSONTEST_ASSERT(errs.empty());
+  }
   {
     char const doc[] = R"([ "\ua3t@" ])";
     bool ok = reader->parse(doc, doc + std::strlen(doc), &root, &errs);