Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -8651,36 +8651,18 @@ private Task WriteEncodingChar(string s, Encoding encoding, TdsParserStateObject

private byte[] SerializeEncodingChar(string s, int numChars, int offset, Encoding encoding)
{
#if NETFRAMEWORK
char[] charData;
byte[] byteData = null;

// if hitting 7.0 server, encoding will be null in metadata for columns or return values since
// 7.0 has no support for multiple code pages in data - single code page support only
if (encoding == null)
{
encoding = _defaultEncoding;
}

charData = s.ToCharArray(offset, numChars);

byteData = new byte[encoding.GetByteCount(charData, 0, charData.Length)];
encoding.GetBytes(charData, 0, charData.Length, byteData, 0);
encoding ??= _defaultEncoding;

return byteData;
#else
return encoding.GetBytes(s, offset, numChars);
#endif
}

private Task WriteEncodingChar(string s, int numChars, int offset, Encoding encoding, TdsParserStateObject stateObj, bool canAccumulate = true)
{
// if hitting 7.0 server, encoding will be null in metadata for columns or return values since
// 7.0 has no support for multiple code pages in data - single code page support only
if (encoding == null)
{
encoding = _defaultEncoding;
}
encoding ??= _defaultEncoding;

// Optimization: if the entire string fits in the current buffer, then copy it directly
int bytesLeft = stateObj._outBuff.Length - stateObj._outBytesUsed;
Expand All @@ -8692,23 +8674,14 @@ private Task WriteEncodingChar(string s, int numChars, int offset, Encoding enco
}
else
{
#if NETFRAMEWORK
char[] charData = s.ToCharArray(offset, numChars);
byte[] byteData = encoding.GetBytes(charData, 0, numChars);
Debug.Assert(byteData != null, "no data from encoding");
return stateObj.WriteByteArray(byteData, byteData.Length, 0, canAccumulate);
#else
byte[] byteData = encoding.GetBytes(s, offset, numChars);
Debug.Assert(byteData != null, "no data from encoding");
return stateObj.WriteByteArray(byteData, byteData.Length, 0, canAccumulate);
#endif
}
}

internal int GetEncodingCharLength(string value, int numChars, int charOffset, Encoding encoding)
{
// UNDONE: (PERF) this is an expensive way to get the length. Also, aren't we
// UNDONE: (PERF) going through these steps twice when we write out a value?
if (string.IsNullOrEmpty(value))
{
return 0;
Expand All @@ -8726,9 +8699,7 @@ internal int GetEncodingCharLength(string value, int numChars, int charOffset, E
encoding = _defaultEncoding;
}

char[] charData = value.ToCharArray(charOffset, numChars);

return encoding.GetByteCount(charData, 0, numChars);
return encoding.GetByteCount(value, charOffset, numChars);
}

//
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -354,8 +354,7 @@ internal void SetString(string value, int offset, int length)
}
else
{
char[] chars = value.ToCharArray(offset, length);
bytes = _stateObj.Parser._defaultEncoding.GetBytes(chars);
bytes = _stateObj.Parser._defaultEncoding.GetBytes(value, offset, length);
}
SetBytes(0, bytes, 0, bytes.Length);
SetBytesLength(bytes.Length);
Expand All @@ -376,7 +375,7 @@ internal void SetString(string value, int offset, int length)
}
else
{
bytes = _stateObj.Parser._defaultEncoding.GetBytes(value.ToCharArray(offset, length));
bytes = _stateObj.Parser._defaultEncoding.GetBytes(value, offset, length);
}
_stateObj.Parser.WriteSqlVariantHeader(9 + bytes.Length, TdsEnums.SQLBIGVARCHAR, 7, _stateObj);
_stateObj.Parser.WriteUnsignedInt(collation._info, _stateObj); // propbytes: collation.Info
Expand Down
Comment thread
paulmedynski marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

Comment thread
paulmedynski marked this conversation as resolved.
#if NETFRAMEWORK

using System.Diagnostics;

#nullable enable

namespace System.Text;

internal static class EncodingExtensions
{
public static int GetByteCount(this Encoding encoding, string? s, int offset, int count)
{
if (s is null)
{
throw new ArgumentNullException(nameof(s));
}

ReadOnlySpan<char> slicedString = s.AsSpan(offset, count);

if (slicedString.Length == 0)
{
return 0;
}

unsafe
{
fixed (char* str = slicedString)
{
return encoding.GetByteCount(str, slicedString.Length);
}
}
}

public static byte[] GetBytes(this Encoding encoding, string? s, int index, int count)
{
if (s is null)
{
throw new ArgumentNullException(nameof(s));
}

ReadOnlySpan<char> slicedString = s.AsSpan(index, count);

if (slicedString.Length == 0)
{
return Array.Empty<byte>();
}

unsafe
{
fixed (char* str = slicedString)
{
int byteCount = encoding.GetByteCount(str, slicedString.Length);
byte[] bytes = new byte[byteCount];

fixed (byte* destArray = &bytes[0])
{
int bytesWritten = encoding.GetBytes(str, slicedString.Length, destArray, bytes.Length);

Debug.Assert(bytesWritten == byteCount);
return bytes;
}
}
}
}
}

#endif
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using Xunit;

namespace System.Text.UnitTests;

/// <summary>
/// Tests that the Encoding polyfills in netfx operate correctly and handle
/// invalid parameter values.
/// </summary>
/// <remarks>
/// In the netcore cases, we're testing the built-in GetBytes and GetByteCount
/// methods. The contract for our extension polyfills must match these implementations.
/// </remarks>
public class EncodingTest
{
private const string ExampleStringValue = "ABCDéFG1234567abcdefg";

/// <summary>
/// Represents a series of invalid [offset, count] pairs into the <see cref="ExampleStringValue"/>
/// constant.
/// </summary>
public static TheoryData<int, int> InvalidOffsetsAndCounts =>
new()
{
// Group 1: offset starts before the string.
// * Count extends beyond it.
{ -1, 999 },
// * Count is valid.
{ -1, 5 },
// Group 2: offset is valid.
// * Count extends beyond the end of it.
{ 0, 999 },
// * Count extends backwards to the start it.
{ 5, -5 },
// Group 3: offset starts beyond the end of the string.
// * Count extends beyond the end of it.
{ 999, 999 },
// * Count extends backwards into the string.
{ 999, -1005 }
};

#if NET
static EncodingTest()
{
Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
}
#endif

/// <summary>
/// Verifies that GetByteCount throws an ArgumentNullException when passed a null string.
/// </summary>
[Fact]
public void GetByteCount_ThrowsOnNullString()
{
string nullString = null!;
Action act = () => Encoding.Unicode.GetByteCount(nullString, 0, 0);

Assert.Throws<ArgumentNullException>(act);
}

/// <summary>
/// Verifies that GetBytes throws an ArgumentNullException when passed a null string.
/// </summary>
[Fact]
public void GetBytes_ThrowsOnNullString()
{
string nullString = null!;
Action act = () => Encoding.Unicode.GetBytes(nullString, 0, 0);

Assert.Throws<ArgumentNullException>(act);
}

/// <summary>
/// Verifies that GetByteCount throws an ArgumentOutOfRangeException when passes an offset
/// or count which is outside of the string.
/// </summary>
/// <param name="offset">offset parameter of GetByteCount.</param>
/// <param name="count">count parameter of GetByteCount.</param>
/// <seealso cref="InvalidOffsetsAndCounts"/>
[Theory]
[MemberData(nameof(InvalidOffsetsAndCounts))]
public void GetByteCount_ThrowsOnOutOfRangeOffsetOrCount(int offset, int count)
{
Action act = () => Encoding.Unicode.GetByteCount(ExampleStringValue, offset, count);

Assert.Throws<ArgumentOutOfRangeException>(act);
}

/// <summary>
/// Verifies that GetBytes throws an ArgumentOutOfRangeException when passes an offset
/// or count which is outside of the string.
/// </summary>
/// <param name="offset">offset parameter of GetBytes.</param>
/// <param name="count">count parameter of GetBytes.</param>
[Theory]
[MemberData(nameof(InvalidOffsetsAndCounts))]
public void GetBytes_ThrowsOnOutOfRangeOffsetOrCount(int offset, int count)
{
Action act = () => Encoding.Unicode.GetBytes(ExampleStringValue, offset, count);

Assert.Throws<ArgumentOutOfRangeException>(act);
}

/// <summary>
/// Verifies that when using the new GetByteCount and GetBytes polyfills to encode the entire string, the return
/// value is equal to passing the string as-is to GetByteCount(string) and GetBytes(string).
/// </summary>
[Fact]
public void GetBytesOfFullStringByLength_MatchesGetBytesOfFullString()
{
byte[] fullStringBytes = Encoding.Unicode.GetBytes(ExampleStringValue);
int fullStringByteCount = Encoding.Unicode.GetByteCount(ExampleStringValue);

byte[] partialStringBytes = Encoding.Unicode.GetBytes(ExampleStringValue, 0, ExampleStringValue.Length);
int partialStringByteCount = Encoding.Unicode.GetByteCount(ExampleStringValue, 0, ExampleStringValue.Length);

Assert.Equal(fullStringByteCount, partialStringByteCount);
Assert.Equal(fullStringByteCount, partialStringBytes.Length);
Assert.Equal(fullStringBytes, partialStringBytes);
}

/// <summary>
/// Verifies that encoding a specific substring returns a byte array which can be decoded into the same string, in
/// various code pages.
/// </summary>
/// <param name="codePage">The code page identifier to use for transcoding.</param>
[Theory]
// Unicode
[InlineData(1200)]
// UTF8
[InlineData(65001)]
public void GetBytes_Roundtrips(int codePage)
{
Encoding encoding = Encoding.GetEncoding(codePage);
byte[] partialStringBytes = encoding.GetBytes(ExampleStringValue, 4, 5);
string expectedRoundtrippedValue = ExampleStringValue.Substring(4, 5);
string roundtrip = encoding.GetString(partialStringBytes);

Assert.Equal(expectedRoundtrippedValue, roundtrip);
}

/// <summary>
/// Verifies that when a string contains a multibyte character, the byte array returns the correct number of
/// elements for the encoding.
/// </summary>
[Fact]
public void GetByteCount_ReturnsCorrectValueOnMultiCharacterRune()
{
// The character é is two bytes in UTF8.
Assert.Equal(6, Encoding.UTF8.GetByteCount(ExampleStringValue, 4, 5));

// All Unicode characters in our sample string are two bytes long.
Assert.Equal(10, Encoding.Unicode.GetByteCount(ExampleStringValue, 4, 5));

// Code page 1251 does not have the é character, so treats it as the single-byte character "e".
Assert.Equal(5, Encoding.GetEncoding(1251).GetByteCount(ExampleStringValue, 4, 5));
}
}
Loading