diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 0ebc1d6a008b77..9d8f38e91ad4a1 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -4312,6 +4312,50 @@ GenTree* Lowering::OptimizeConstCompare(GenTree* cmp) BlockRange().Remove(cast); } } +#ifdef TARGET_XARCH + else if ((castToType == TYP_BYTE) && FitsIn(op2Value)) + { + // + // Mirror the TYP_UBYTE case above for signed byte casts. Removing the cast lets + // codegen emit a single byte-sized `cmp` (e.g. `cmp cl, 0xC0`) instead of first + // sign-extending the operand with `movsx`. The compare stays signed because + // TYP_BYTE is a signed small type, so LowerCompare's small-unsigned promotion + // does not apply. + // + // Bail out for `x < 0` / `x >= 0` against a zero constant: codegen has a + // sign-bit-shift optimization (`mov + shr`) that assumes the operand is + // sign-extended to the full register width. After narrowing the operand to + // TYP_BYTE, the register no longer carries that sign extension and the shift + // amount would be wrong (it uses `emitActualTypeSize` which is still 4 for + // TYP_BYTE). Letting the cast survive keeps that path correct. + // + const bool isSignBitTest = (op2Value == 0) && cmp->OperIs(GT_LT, GT_GE); + bool removeCast = !isSignBitTest && (castOp->OperIs(GT_LCL_VAR, GT_CALL, GT_OR, GT_XOR, GT_AND) || + IsContainableMemoryOp(castOp)); + + if (removeCast) + { + assert(!castOp->gtOverflowEx()); // Must not be an overflow checking operation + + castOp->gtType = castToType; + op2->gtType = castToType; + + // If we have any contained memory ops on castOp, they must now not be contained. + castOp->ClearContained(); + + if (castOp->OperIs(GT_OR, GT_XOR, GT_AND)) + { + castOp->gtGetOp1()->ClearContained(); + castOp->gtGetOp2()->ClearContained(); + ContainCheckBinary(castOp->AsOp()); + } + + cmp->AsOp()->gtOp1 = castOp; + + BlockRange().Remove(cast); + } + } +#endif // TARGET_XARCH } else if (op1->OperIs(GT_AND) && cmp->OperIs(GT_EQ, GT_NE)) { diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_10337/Runtime_10337.cs b/src/tests/JIT/Regression/JitBlue/Runtime_10337/Runtime_10337.cs new file mode 100644 index 00000000000000..eb94a55ae1dbfb --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_10337/Runtime_10337.cs @@ -0,0 +1,100 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +namespace Runtime_10337; + +using System; +using System.Runtime.CompilerServices; +using Xunit; + +// Tests for the Lowering::OptimizeConstCompare TYP_BYTE narrowing path. +// Each method exercises a compare against a constant that fits in INT8 with +// an operand whose effective type is TYP_BYTE (the result of `(sbyte)x`). +// The narrowing must produce the same result as the canonical +// sign-extend-and-compare path for every input. + +public class Runtime_10337 +{ + [MethodImpl(MethodImplOptions.NoInlining)] + private static bool Lt_M64(byte x) => ((sbyte)x) < -64; + + [MethodImpl(MethodImplOptions.NoInlining)] + private static bool Le_M64(byte x) => ((sbyte)x) <= -64; + + [MethodImpl(MethodImplOptions.NoInlining)] + private static bool Gt_M64(byte x) => ((sbyte)x) > -64; + + [MethodImpl(MethodImplOptions.NoInlining)] + private static bool Ge_M64(byte x) => ((sbyte)x) >= -64; + + [MethodImpl(MethodImplOptions.NoInlining)] + private static bool Eq_M1(byte x) => ((sbyte)x) == -1; + + [MethodImpl(MethodImplOptions.NoInlining)] + private static bool Ne_M1(byte x) => ((sbyte)x) != -1; + + [MethodImpl(MethodImplOptions.NoInlining)] + private static bool Lt_Max(byte x) => ((sbyte)x) < 127; + + // op2 == 0 with LT/GE must keep using the sign-bit-shift codegen. + [MethodImpl(MethodImplOptions.NoInlining)] + private static bool Lt_Zero(byte x) => ((sbyte)x) < 0; + + [MethodImpl(MethodImplOptions.NoInlining)] + private static bool Ge_Zero(byte x) => ((sbyte)x) >= 0; + + [MethodImpl(MethodImplOptions.NoInlining)] + private static bool Eq_Zero(byte x) => ((sbyte)x) == 0; + + // Int source so the cast is doing real truncation, not just reinterpretation. + [MethodImpl(MethodImplOptions.NoInlining)] + private static bool Lt_IntSrc(int x) => ((sbyte)x) < -64; + + // CAST(BYTE) over AND -- exercises the OR/XOR/AND narrowing branch. + [MethodImpl(MethodImplOptions.NoInlining)] + private static bool Lt_And(int x) => ((sbyte)(x & 0xF0)) < -16; + + // Memory operand: the comparison should contain the load. + [MethodImpl(MethodImplOptions.NoInlining)] + private static unsafe bool Lt_Mem(byte* p) => ((sbyte)*p) < -64; + + [Fact] + public static int TestEntryPoint() + { + bool ok = true; + + for (int x = 0; x < 256; x++) + { + byte b = (byte)x; + sbyte sb = (sbyte)b; + + ok &= Lt_M64(b) == (sb < -64); + ok &= Le_M64(b) == (sb <= -64); + ok &= Gt_M64(b) == (sb > -64); + ok &= Ge_M64(b) == (sb >= -64); + ok &= Eq_M1(b) == (sb == -1); + ok &= Ne_M1(b) == (sb != -1); + ok &= Lt_Max(b) == (sb < 127); + ok &= Lt_Zero(b) == (sb < 0); + ok &= Ge_Zero(b) == (sb >= 0); + ok &= Eq_Zero(b) == (sb == 0); + + // Wider int sources, including patterns with garbage in the + // upper bits so we exercise truncation semantics. + int[] ints = { x, x ^ unchecked((int)0xFFFFFF00), (x << 8) | x, ~x }; + foreach (int xs in ints) + { + ok &= Lt_IntSrc(xs) == (((sbyte)xs) < -64); + ok &= Lt_And(xs) == (((sbyte)(xs & 0xF0)) < -16); + } + + unsafe + { + byte bb = b; + ok &= Lt_Mem(&bb) == (sb < -64); + } + } + + return ok ? 100 : 1; + } +} diff --git a/src/tests/JIT/Regression/Regression_ro_2.csproj b/src/tests/JIT/Regression/Regression_ro_2.csproj index d4d6bd2ba5d72e..bba2c75d0bcd0e 100644 --- a/src/tests/JIT/Regression/Regression_ro_2.csproj +++ b/src/tests/JIT/Regression/Regression_ro_2.csproj @@ -102,6 +102,7 @@ +