diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index 3690d72abb2343..33dca94cf45d88 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -1226,26 +1226,42 @@ bool Compiler::optDeriveLoopCloningConditions(FlowGraphNaturalLoop* loop, LoopCl // is beyond the limit. int stride = abs(iterInfo->IterConst()); + // For arrays the per-access cloning condition only bounds `limit` by + // Array.MaxLength (0x7FFFFFC7), which leaves room for the post-step IV + // up to `limit + s - 1` to fit in INT_MAX as long as `s <= 58`. Larger + // strides need an explicit overflow guard, same shape as the one used + // for spans (where Span<>.Length can reach INT_MAX even at small s). static_assert(INT32_MAX >= CORINFO_Array_MaxLength); - if (stride >= (INT32_MAX - (CORINFO_Array_MaxLength - 1) + 1)) + const bool largeStride = (stride >= (INT32_MAX - (CORINFO_Array_MaxLength - 1) + 1)); + const bool needsOverflowGuard = hasSpans || largeStride; + + // If the loop limit is an array length, compute the underlying ArrIndex + // and queue the deref check once up front. The optional zero-trip guard, + // the optional overflow guard, and the regular limit conditions all + // reuse this single ArrIndex. + // + ArrIndex* limitArrIndex = nullptr; + if (iterInfo->HasArrayLengthLimit) { - // Array.MaxLength can have maximum of 0x7fffffc7 elements, so make sure - // the stride increment doesn't overflow or underflow the index. Hence, - // the maximum stride limit is set to - // (int.MaxValue - (Array.MaxLength - 1) + 1), which is - // (0X7fffffff - 0x7fffffc7 + 2) = 0x3a or 58. - return false; + limitArrIndex = new (getAllocator(CMK_LoopClone)) ArrIndex(getAllocator(CMK_LoopClone)); + if (!iterInfo->ArrLenLimit(this, limitArrIndex)) + { + JITDUMP("> ArrLen not matching\n"); + return false; + } + + LC_Array array(LC_Array::Jagged, limitArrIndex, LC_Array::None); + context->EnsureArrayDerefs(loop->GetIndex())->Push(array); } - // Span<>.Length can be INT32_MAX, unlike Array.MaxLength. For an - // increasing loop with stride > 1, the IV after the final in-loop - // increment is at most `limit + s` (LE) or `limit + s - 1` (LT), so - // a limit near INT32_MAX would wrap the IV and let the bounds-check- - // stripped fast clone access memory past the span. Bound the limit - // base accordingly. Decreasing loops are safe via the existing - // `limit >= 0` condition plus the stride cap above. HasArrayLengthLimit - // is bounded implicitly by Array.MaxLength. - if (hasSpans && (stride > 1) && isIncreasingLoop) + // For an increasing loop with stride > 1, the IV after the final in-loop + // increment is at most `limit + s` (LE) or `limit + s - 1` (LT), so a + // limit near INT32_MAX would wrap the IV and let the bounds-check- + // stripped fast clone access memory past the array/span. Bound the limit + // base accordingly. Decreasing loops are safe via the existing `limit + // >= 0` condition (post-step IV >= -stride > INT_MIN for any non-absurd + // stride). + if ((stride > 1) && isIncreasingLoop && needsOverflowGuard) { const int adjustForLE = (iterInfo->TestOper() == GT_LE) ? 1 : 0; const int offset = iterInfo->LimitOffset; @@ -1257,7 +1273,7 @@ bool Compiler::optDeriveLoopCloningConditions(FlowGraphNaturalLoop* loop, LoopCl const int limitVal = iterInfo->ConstLimit(); if ((int64_t)limitVal > maxLimitBase64) { - JITDUMP("> Span stride %d: const limit %d exceeds overflow bound %lld\n", stride, limitVal, + JITDUMP("> Stride %d: const limit %d exceeds overflow bound %lld\n", stride, limitVal, (long long)maxLimitBase64); return false; } @@ -1266,12 +1282,11 @@ bool Compiler::optDeriveLoopCloningConditions(FlowGraphNaturalLoop* loop, LoopCl { if (maxLimitBase64 >= INT32_MAX) { - // Offset already absorbs the stride; guard is vacuous. - JITDUMP("Span stride>1 overflow guard trivially holds (offset %d)\n", offset); + JITDUMP("Stride>1 overflow guard trivially holds (offset %d)\n", offset); } else if (maxLimitBase64 < 0) { - JITDUMP("> Span stride %d, offset %d: overflow guard unsatisfiable\n", stride, offset); + JITDUMP("> Stride %d, offset %d: overflow guard unsatisfiable\n", stride, offset); return false; } else @@ -1279,7 +1294,7 @@ bool Compiler::optDeriveLoopCloningConditions(FlowGraphNaturalLoop* loop, LoopCl const unsigned limitLcl = iterInfo->VarLimit(); if (!genActualTypeIsInt(lvaGetDesc(limitLcl))) { - JITDUMP("> Span stride %d: limit var V%02u not TYP_INT-compatible\n", stride, limitLcl); + JITDUMP("> Stride %d: limit var V%02u not TYP_INT-compatible\n", stride, limitLcl); return false; } @@ -1288,29 +1303,36 @@ bool Compiler::optDeriveLoopCloningConditions(FlowGraphNaturalLoop* loop, LoopCl LC_Ident maxConstIdent = LC_Ident::CreateConst(static_cast(maxLimit)); LC_Condition overflowGuard(GT_LE, LC_Expr(limitVarIdent), LC_Expr(maxConstIdent)); context->EnsureConditions(loop->GetIndex())->Push(overflowGuard); - JITDUMP("Added Span stride>1 overflow guard: V%02u <= %d\n", limitLcl, maxLimit); + JITDUMP("Added stride>1 overflow guard: V%02u <= %d\n", limitLcl, maxLimit); } } - // HasArrayLengthLimit: bounded by Array.MaxLength, no extra guard. - } - - // If the loop limit is an array length, compute the underlying ArrIndex - // and queue the deref check once up front. Both the optional zero-trip - // guard below and the regular limit conditions further down reuse this - // single ArrIndex to avoid duplicating the deref entry and allocation. - // - ArrIndex* limitArrIndex = nullptr; - if (iterInfo->HasArrayLengthLimit) - { - limitArrIndex = new (getAllocator(CMK_LoopClone)) ArrIndex(getAllocator(CMK_LoopClone)); - if (!iterInfo->ArrLenLimit(this, limitArrIndex)) + else if (iterInfo->HasArrayLengthLimit && largeStride) { - JITDUMP("> ArrLen not matching\n"); - return false; + // For stride <= 57 the implicit Array.MaxLength bound suffices; + // we fall through with no extra check. For wider strides emit a + // runtime guard on arr.Length so the fast clone only runs when + // the array is short enough that the post-step IV stays in int. + assert(limitArrIndex != nullptr); + if (maxLimitBase64 >= CORINFO_Array_MaxLength) + { + JITDUMP("Stride>1 overflow guard trivially holds for arr.Length (offset %d)\n", offset); + } + else if (maxLimitBase64 < 0) + { + JITDUMP("> Stride %d, offset %d: arr.Length overflow guard unsatisfiable\n", stride, offset); + return false; + } + else + { + const int maxLimit = (int)maxLimitBase64; + LC_Ident arrLenIdent = + LC_Ident::CreateArrAccess(LC_Array(LC_Array::Jagged, limitArrIndex, LC_Array::ArrLen)); + LC_Ident maxConstIdent = LC_Ident::CreateConst(static_cast(maxLimit)); + LC_Condition overflowGuard(GT_LE, LC_Expr(arrLenIdent), LC_Expr(maxConstIdent)); + context->EnsureConditions(loop->GetIndex())->Push(overflowGuard); + JITDUMP("Added stride>1 arr.Length overflow guard: <= %d\n", maxLimit); + } } - - LC_Array array(LC_Array::Jagged, limitArrIndex, LC_Array::None); - context->EnsureArrayDerefs(loop->GetIndex())->Push(array); } // If AnalyzeIteration could not prove the loop condition holds on entry, diff --git a/src/tests/JIT/opt/Cloning/LargeStride.cs b/src/tests/JIT/opt/Cloning/LargeStride.cs new file mode 100644 index 00000000000000..1f0a8ba6476962 --- /dev/null +++ b/src/tests/JIT/opt/Cloning/LargeStride.cs @@ -0,0 +1,127 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Runtime.CompilerServices; +using Xunit; + +public class LargeStride +{ + [MethodImpl(MethodImplOptions.NoInlining | MethodImplOptions.AggressiveOptimization)] + static int ArrayLengthStride60(int[] a) + { + int sum = 0; + for (int i = 0; i < a.Length; i += 60) + sum += a[i]; + return sum; + } + + [MethodImpl(MethodImplOptions.NoInlining | MethodImplOptions.AggressiveOptimization)] + static int ArrayLengthStride256(int[] a) + { + int sum = 0; + for (int i = 0; i < a.Length; i += 256) + sum += a[i]; + return sum; + } + + [MethodImpl(MethodImplOptions.NoInlining | MethodImplOptions.AggressiveOptimization)] + static int VarLimitStride100(int[] a, int n) + { + int sum = 0; + for (int i = 0; i < n; i += 100) + sum += a[i]; + return sum; + } + + [MethodImpl(MethodImplOptions.NoInlining | MethodImplOptions.AggressiveOptimization)] + static int ConstLimitStride200(int[] a) + { + int sum = 0; + for (int i = 0; i < 1000; i += 200) + sum += a[i]; + return sum; + } + + [MethodImpl(MethodImplOptions.NoInlining | MethodImplOptions.AggressiveOptimization)] + static int DecGtStride100(int[] a, int n) + { + int sum = 0; + for (int i = n; i > 0; i -= 100) + sum += a[i]; + return sum; + } + + static int[] Make(int n) + { + int[] a = new int[n]; + for (int i = 0; i < n; i++) a[i] = i + 1; + return a; + } + + static int ExpectedIncLt(int n, int stride) + { + int sum = 0; + for (int i = 0; i < n; i += stride) sum += i + 1; + return sum; + } + + static int ExpectedDecGt(int n, int stride) + { + int sum = 0; + for (int i = n; i > 0; i -= stride) sum += i + 1; + return sum; + } + + [Theory] + [InlineData(0)] + [InlineData(59)] + [InlineData(60)] + [InlineData(120)] + [InlineData(300)] + public static void ArrayLengthStride60Test(int n) + { + int[] a = Make(n); + Assert.Equal(ExpectedIncLt(n, 60), ArrayLengthStride60(a)); + } + + [Theory] + [InlineData(0)] + [InlineData(255)] + [InlineData(256)] + [InlineData(1000)] + public static void ArrayLengthStride256Test(int n) + { + int[] a = Make(n); + Assert.Equal(ExpectedIncLt(n, 256), ArrayLengthStride256(a)); + } + + [Theory] + [InlineData(0, 1000)] + [InlineData(99, 1000)] + [InlineData(100, 1000)] + [InlineData(500, 1000)] + public static void VarLimitStride100Test(int n, int len) + { + int[] a = Make(len); + Assert.Equal(ExpectedIncLt(n, 100), VarLimitStride100(a, n)); + } + + [Fact] + public static void ConstLimitStride200Test() + { + int[] a = Make(1000); + Assert.Equal(ExpectedIncLt(1000, 200), ConstLimitStride200(a)); + } + + [Theory] + [InlineData(1, 1000)] + [InlineData(99, 1000)] + [InlineData(100, 1000)] + [InlineData(500, 1000)] + public static void DecGtStride100Test(int n, int len) + { + int[] a = Make(len); + Assert.Equal(ExpectedDecGt(n, 100), DecGtStride100(a, n)); + } +} diff --git a/src/tests/JIT/opt/Cloning/LargeStride.csproj b/src/tests/JIT/opt/Cloning/LargeStride.csproj new file mode 100644 index 00000000000000..de6d5e08882e86 --- /dev/null +++ b/src/tests/JIT/opt/Cloning/LargeStride.csproj @@ -0,0 +1,8 @@ + + + True + + + + +