diff --git a/NativeScript/runtime/ArgConverter.mm b/NativeScript/runtime/ArgConverter.mm index bd6956c6..16600f96 100644 --- a/NativeScript/runtime/ArgConverter.mm +++ b/NativeScript/runtime/ArgConverter.mm @@ -294,7 +294,8 @@ } else if (value->IsString()) { if (type == BinaryTypeEncodingType::IdEncoding || type == BinaryTypeEncodingType::InterfaceDeclarationReference) { - id data = tns::ToNSString(isolate, value); + std::u16string strValue = tns::ToUtf16String(isolate, value); + id data = [NSString stringWithCharacters:(const unichar*)strValue.data() length:strValue.size()]; // this feels wrong but follows the other CFBridgingRetain calls // and also solves a leak auto ref = CFBridgingRetain(data); @@ -929,8 +930,8 @@ } if ([obj isKindOfClass:[NSString class]]) { - const char* str = [obj UTF8String]; - args.GetReturnValue().Set(tns::ToV8String(isolate, str)); + NSString* nativeStr = (NSString*)obj; + args.GetReturnValue().Set(tns::ToV8String(isolate, nativeStr)); return; } diff --git a/NativeScript/runtime/DictionaryAdapter.mm b/NativeScript/runtime/DictionaryAdapter.mm index f6e85ce6..1dbd2c48 100644 --- a/NativeScript/runtime/DictionaryAdapter.mm +++ b/NativeScript/runtime/DictionaryAdapter.mm @@ -48,7 +48,8 @@ - (id)nextObject { bool success = array->Get(context, self->index_).ToLocal(&key); tns::Assert(success, isolate); self->index_ += 2; - NSString* result = tns::ToNSString(isolate, key); + std::u16string keyStr = tns::ToUtf16String(isolate, key); + NSString* result = [NSString stringWithCharacters:(const unichar*)keyStr.data() length:keyStr.length()]; return result; } @@ -116,8 +117,8 @@ - (id)nextObject { bool success = properties->Get(context, (uint)self->index_).ToLocal(&value); tns::Assert(success, isolate); self->index_++; - std::string result = tns::ToString(isolate, value); - return [NSString stringWithUTF8String:result.c_str()]; + std::u16string result = tns::ToUtf16String(isolate, value); + return [NSString stringWithCharacters:(const unichar*)result.data() length:result.size()]; } return nil; @@ -139,8 +140,8 @@ - (NSArray*)allObjects { Local value; bool success = properties->Get(context, i).ToLocal(&value); tns::Assert(success, isolate); - std::string result = tns::ToString(isolate, value); - [array addObject:[NSString stringWithUTF8String:result.c_str()]]; + std::u16string result = tns::ToUtf16String(isolate, value); + [array addObject:[NSString stringWithCharacters:(const unichar*)result.data() length:result.size()]]; } return array; @@ -214,7 +215,7 @@ - (id)objectForKey:(id)aKey { bool success = obj->Get(context, key).ToLocal(&value); tns::Assert(success, isolate); } else if ([aKey isKindOfClass:[NSString class]]) { - const char* key = [aKey UTF8String]; + NSString* key = (NSString*)aKey; Local keyV8Str = tns::ToV8String(isolate, key); if (obj->IsMap()) { diff --git a/NativeScript/runtime/Helpers.mm b/NativeScript/runtime/Helpers.mm index d3174360..6d3cab9d 100644 --- a/NativeScript/runtime/Helpers.mm +++ b/NativeScript/runtime/Helpers.mm @@ -24,13 +24,26 @@ } // namespace std::u16string tns::ToUtf16String(Isolate* isolate, const Local& value) { - std::string valueStr = tns::ToString(isolate, value); -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - // FIXME: std::codecvt_utf8_utf16 is deprecated - std::wstring_convert, char16_t> convert; - std::u16string value16 = convert.from_bytes(valueStr); + // Read the V8 string's native UTF-16 buffer directly instead of round-tripping + // through UTF-8, which corrupts lone surrogates (replaced with U+FFFD) and is + // slower. This also drops the deprecated std::codecvt_utf8_utf16. + if (value.IsEmpty()) { + return std::u16string(); + } + + if (value->IsStringObject()) { + Local obj = value.As()->ValueOf(); + return tns::ToUtf16String(isolate, obj); + } + + v8::String::Value result(isolate, value); + + uint16_t* val = *result; + if (val == nullptr) { + return std::u16string(); + } - return value16; + return std::u16string((char16_t*)val, result.length()); } std::vector tns::ToVector(const std::string& value) { diff --git a/NativeScript/runtime/Interop.mm b/NativeScript/runtime/Interop.mm index 23bc4647..f84292c3 100644 --- a/NativeScript/runtime/Interop.mm +++ b/NativeScript/runtime/Interop.mm @@ -324,7 +324,8 @@ inline bool isBool() { } else if (argHelper.isString() && (typeEncoding->type == BinaryTypeEncodingType::InterfaceDeclarationReference || typeEncoding->type == BinaryTypeEncodingType::IdEncoding)) { - NSString* result = tns::ToNSString(isolate, arg); + std::u16string str = tns::ToUtf16String(isolate, arg); + NSString* result = [NSString stringWithCharacters:(const unichar*)str.data() length:str.size()]; Interop::SetValue(dest, result); } else if (Interop::IsNumbericType(typeEncoding->type) || tns::IsNumber(arg)) { double value = tns::ToNumber(isolate, arg); @@ -686,7 +687,8 @@ inline bool isBool() { if (arg.IsEmpty() || arg->IsNullOrUndefined()) { return nil; } else if (tns::IsString(arg)) { - NSString* result = tns::ToNSString(isolate, arg); + std::u16string value = tns::ToUtf16String(isolate, arg); + NSString* result = [NSString stringWithCharacters:(const unichar*)value.data() length:value.size()]; return result; } else if (tns::IsNumber(arg)) { double value = tns::ToNumber(isolate, arg); diff --git a/TestRunner/app/tests/ApiTests.js b/TestRunner/app/tests/ApiTests.js index 84834d17..d4c144df 100644 --- a/TestRunner/app/tests/ApiTests.js +++ b/TestRunner/app/tests/ApiTests.js @@ -12,6 +12,39 @@ describe(module.id, function () { expect(object.hash).toBe(3); }); + it("preserves a lone high surrogate when bridging a JS string to NSString", function () { + // A lone high surrogate (U+D834, range U+D800-U+DBFF) is a valid JS string + // code unit but has no UTF-8 encoding. The old UTF-8 round-trip replaced it + // with U+FFFD; faithful UTF-16 bridging keeps it. Read the code unit straight + // out of the bridged string's UTF-16 buffer as a number: reading it back as a + // JS string would re-corrupt a lone surrogate, and converting it to UTF-8 to + // measure it is not reliable across OS versions. + var ns = NSString.stringWithString("\uD834"); + expect(ns.length).toBe(1); + + var buffer = interop.alloc(interop.sizeof(interop.types.uint16)); + ns.getCharactersRange(buffer, NSMakeRange(0, 1)); + var codeUnit = new interop.Reference(interop.types.uint16, buffer).value; + interop.free(buffer); + + expect(codeUnit).toBe(0xD834); // 0xFFFD (65533) after a lossy UTF-8 round-trip + }); + + it("preserves a lone low surrogate when bridging a JS string to NSString", function () { + // The low surrogate range (U+DC00-U+DFFF) is a different bit pattern that also + // has no UTF-8 encoding and must survive the bridge intact; observed the same + // way as the high-surrogate case above. + var ns = NSString.stringWithString("\uDC00"); + expect(ns.length).toBe(1); + + var buffer = interop.alloc(interop.sizeof(interop.types.uint16)); + ns.getCharactersRange(buffer, NSMakeRange(0, 1)); + var codeUnit = new interop.Reference(interop.types.uint16, buffer).value; + interop.free(buffer); + + expect(codeUnit).toBe(0xDC00); // 0xFFFD (65533) after a lossy UTF-8 round-trip + }); + it("NSArray from native (uncached) array access", function () { const res = TNSObjCTypes.new().getNSArrayOfNSURLs(); console.log(res);