Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 62 additions & 40 deletions src/coreclr/jit/loopcloning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1226,26 +1226,42 @@ bool Compiler::optDeriveLoopCloningConditions(FlowGraphNaturalLoop* loop, LoopCl
// is beyond the limit.
int stride = abs(iterInfo->IterConst());

// For arrays the per-access cloning condition only bounds `limit` by
// Array.MaxLength (0x7FFFFFC7), which leaves room for the post-step IV
// up to `limit + s - 1` to fit in INT_MAX as long as `s <= 58`. Larger
// strides need an explicit overflow guard, same shape as the one used
// for spans (where Span<>.Length can reach INT_MAX even at small s).
Comment on lines +1229 to +1233
static_assert(INT32_MAX >= CORINFO_Array_MaxLength);
if (stride >= (INT32_MAX - (CORINFO_Array_MaxLength - 1) + 1))
const bool largeStride = (stride >= (INT32_MAX - (CORINFO_Array_MaxLength - 1) + 1));
const bool needsOverflowGuard = hasSpans || largeStride;

// If the loop limit is an array length, compute the underlying ArrIndex
// and queue the deref check once up front. The optional zero-trip guard,
// the optional overflow guard, and the regular limit conditions all
// reuse this single ArrIndex.
//
ArrIndex* limitArrIndex = nullptr;
if (iterInfo->HasArrayLengthLimit)
{
// Array.MaxLength can have maximum of 0x7fffffc7 elements, so make sure
// the stride increment doesn't overflow or underflow the index. Hence,
// the maximum stride limit is set to
// (int.MaxValue - (Array.MaxLength - 1) + 1), which is
// (0X7fffffff - 0x7fffffc7 + 2) = 0x3a or 58.
return false;
limitArrIndex = new (getAllocator(CMK_LoopClone)) ArrIndex(getAllocator(CMK_LoopClone));
if (!iterInfo->ArrLenLimit(this, limitArrIndex))
{
JITDUMP("> ArrLen not matching\n");
return false;
}

LC_Array array(LC_Array::Jagged, limitArrIndex, LC_Array::None);
context->EnsureArrayDerefs(loop->GetIndex())->Push(array);
}

// Span<>.Length can be INT32_MAX, unlike Array.MaxLength. For an
// increasing loop with stride > 1, the IV after the final in-loop
// increment is at most `limit + s` (LE) or `limit + s - 1` (LT), so
// a limit near INT32_MAX would wrap the IV and let the bounds-check-
// stripped fast clone access memory past the span. Bound the limit
// base accordingly. Decreasing loops are safe via the existing
// `limit >= 0` condition plus the stride cap above. HasArrayLengthLimit
// is bounded implicitly by Array.MaxLength.
if (hasSpans && (stride > 1) && isIncreasingLoop)
// For an increasing loop with stride > 1, the IV after the final in-loop
// increment is at most `limit + s` (LE) or `limit + s - 1` (LT), so a
// limit near INT32_MAX would wrap the IV and let the bounds-check-
// stripped fast clone access memory past the array/span. Bound the limit
// base accordingly. Decreasing loops are safe via the existing `limit
// >= 0` condition (post-step IV >= -stride > INT_MIN for any non-absurd
// stride).
if ((stride > 1) && isIncreasingLoop && needsOverflowGuard)
{
const int adjustForLE = (iterInfo->TestOper() == GT_LE) ? 1 : 0;
const int offset = iterInfo->LimitOffset;
Expand All @@ -1257,7 +1273,7 @@ bool Compiler::optDeriveLoopCloningConditions(FlowGraphNaturalLoop* loop, LoopCl
const int limitVal = iterInfo->ConstLimit();
if ((int64_t)limitVal > maxLimitBase64)
{
JITDUMP("> Span stride %d: const limit %d exceeds overflow bound %lld\n", stride, limitVal,
JITDUMP("> Stride %d: const limit %d exceeds overflow bound %lld\n", stride, limitVal,
(long long)maxLimitBase64);
return false;
}
Expand All @@ -1266,20 +1282,19 @@ bool Compiler::optDeriveLoopCloningConditions(FlowGraphNaturalLoop* loop, LoopCl
{
if (maxLimitBase64 >= INT32_MAX)
{
// Offset already absorbs the stride; guard is vacuous.
JITDUMP("Span stride>1 overflow guard trivially holds (offset %d)\n", offset);
JITDUMP("Stride>1 overflow guard trivially holds (offset %d)\n", offset);
}
else if (maxLimitBase64 < 0)
{
JITDUMP("> Span stride %d, offset %d: overflow guard unsatisfiable\n", stride, offset);
JITDUMP("> Stride %d, offset %d: overflow guard unsatisfiable\n", stride, offset);
return false;
}
else
{
const unsigned limitLcl = iterInfo->VarLimit();
if (!genActualTypeIsInt(lvaGetDesc(limitLcl)))
{
JITDUMP("> Span stride %d: limit var V%02u not TYP_INT-compatible\n", stride, limitLcl);
JITDUMP("> Stride %d: limit var V%02u not TYP_INT-compatible\n", stride, limitLcl);
return false;
}

Expand All @@ -1288,29 +1303,36 @@ bool Compiler::optDeriveLoopCloningConditions(FlowGraphNaturalLoop* loop, LoopCl
LC_Ident maxConstIdent = LC_Ident::CreateConst(static_cast<unsigned>(maxLimit));
LC_Condition overflowGuard(GT_LE, LC_Expr(limitVarIdent), LC_Expr(maxConstIdent));
context->EnsureConditions(loop->GetIndex())->Push(overflowGuard);
JITDUMP("Added Span stride>1 overflow guard: V%02u <= %d\n", limitLcl, maxLimit);
JITDUMP("Added stride>1 overflow guard: V%02u <= %d\n", limitLcl, maxLimit);
}
}
// HasArrayLengthLimit: bounded by Array.MaxLength, no extra guard.
}

// If the loop limit is an array length, compute the underlying ArrIndex
// and queue the deref check once up front. Both the optional zero-trip
// guard below and the regular limit conditions further down reuse this
// single ArrIndex to avoid duplicating the deref entry and allocation.
//
ArrIndex* limitArrIndex = nullptr;
if (iterInfo->HasArrayLengthLimit)
{
limitArrIndex = new (getAllocator(CMK_LoopClone)) ArrIndex(getAllocator(CMK_LoopClone));
if (!iterInfo->ArrLenLimit(this, limitArrIndex))
else if (iterInfo->HasArrayLengthLimit && largeStride)
{
JITDUMP("> ArrLen not matching\n");
return false;
// For stride <= 57 the implicit Array.MaxLength bound suffices;
// we fall through with no extra check. For wider strides emit a
// runtime guard on arr.Length so the fast clone only runs when
// the array is short enough that the post-step IV stays in int.
assert(limitArrIndex != nullptr);
if (maxLimitBase64 >= CORINFO_Array_MaxLength)
{
JITDUMP("Stride>1 overflow guard trivially holds for arr.Length (offset %d)\n", offset);
}
else if (maxLimitBase64 < 0)
{
JITDUMP("> Stride %d, offset %d: arr.Length overflow guard unsatisfiable\n", stride, offset);
return false;
}
else
{
const int maxLimit = (int)maxLimitBase64;
LC_Ident arrLenIdent =
LC_Ident::CreateArrAccess(LC_Array(LC_Array::Jagged, limitArrIndex, LC_Array::ArrLen));
LC_Ident maxConstIdent = LC_Ident::CreateConst(static_cast<unsigned>(maxLimit));
LC_Condition overflowGuard(GT_LE, LC_Expr(arrLenIdent), LC_Expr(maxConstIdent));
context->EnsureConditions(loop->GetIndex())->Push(overflowGuard);
JITDUMP("Added stride>1 arr.Length overflow guard: <= %d\n", maxLimit);
}
}

LC_Array array(LC_Array::Jagged, limitArrIndex, LC_Array::None);
context->EnsureArrayDerefs(loop->GetIndex())->Push(array);
}

// If AnalyzeIteration could not prove the loop condition holds on entry,
Expand Down
127 changes: 127 additions & 0 deletions src/tests/JIT/opt/Cloning/LargeStride.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Runtime.CompilerServices;
using Xunit;

public class LargeStride
{
[MethodImpl(MethodImplOptions.NoInlining | MethodImplOptions.AggressiveOptimization)]
static int ArrayLengthStride60(int[] a)
{
int sum = 0;
for (int i = 0; i < a.Length; i += 60)
sum += a[i];
return sum;
}

[MethodImpl(MethodImplOptions.NoInlining | MethodImplOptions.AggressiveOptimization)]
static int ArrayLengthStride256(int[] a)
{
int sum = 0;
for (int i = 0; i < a.Length; i += 256)
sum += a[i];
return sum;
}

[MethodImpl(MethodImplOptions.NoInlining | MethodImplOptions.AggressiveOptimization)]
static int VarLimitStride100(int[] a, int n)
{
int sum = 0;
for (int i = 0; i < n; i += 100)
sum += a[i];
return sum;
}

[MethodImpl(MethodImplOptions.NoInlining | MethodImplOptions.AggressiveOptimization)]
static int ConstLimitStride200(int[] a)
{
int sum = 0;
for (int i = 0; i < 1000; i += 200)
sum += a[i];
return sum;
}

[MethodImpl(MethodImplOptions.NoInlining | MethodImplOptions.AggressiveOptimization)]
static int DecGtStride100(int[] a, int n)
{
int sum = 0;
for (int i = n; i > 0; i -= 100)
sum += a[i];
return sum;
}

static int[] Make(int n)
{
int[] a = new int[n];
for (int i = 0; i < n; i++) a[i] = i + 1;
return a;
}

static int ExpectedIncLt(int n, int stride)
{
int sum = 0;
for (int i = 0; i < n; i += stride) sum += i + 1;
return sum;
}

static int ExpectedDecGt(int n, int stride)
{
int sum = 0;
for (int i = n; i > 0; i -= stride) sum += i + 1;
return sum;
}

[Theory]
[InlineData(0)]
[InlineData(59)]
[InlineData(60)]
[InlineData(120)]
[InlineData(300)]
public static void ArrayLengthStride60Test(int n)
{
int[] a = Make(n);
Assert.Equal(ExpectedIncLt(n, 60), ArrayLengthStride60(a));
}

[Theory]
[InlineData(0)]
[InlineData(255)]
[InlineData(256)]
[InlineData(1000)]
public static void ArrayLengthStride256Test(int n)
{
int[] a = Make(n);
Assert.Equal(ExpectedIncLt(n, 256), ArrayLengthStride256(a));
}

[Theory]
[InlineData(0, 1000)]
[InlineData(99, 1000)]
[InlineData(100, 1000)]
[InlineData(500, 1000)]
public static void VarLimitStride100Test(int n, int len)
{
int[] a = Make(len);
Assert.Equal(ExpectedIncLt(n, 100), VarLimitStride100(a, n));
}

[Fact]
public static void ConstLimitStride200Test()
{
int[] a = Make(1000);
Assert.Equal(ExpectedIncLt(1000, 200), ConstLimitStride200(a));
}

[Theory]
[InlineData(1, 1000)]
[InlineData(99, 1000)]
[InlineData(100, 1000)]
[InlineData(500, 1000)]
public static void DecGtStride100Test(int n, int len)
{
int[] a = Make(len);
Assert.Equal(ExpectedDecGt(n, 100), DecGtStride100(a, n));
}
}
8 changes: 8 additions & 0 deletions src/tests/JIT/opt/Cloning/LargeStride.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<Optimize>True</Optimize>
</PropertyGroup>
<ItemGroup>
<Compile Include="$(MSBuildProjectName).cs" />
</ItemGroup>
</Project>
Loading