forked from OSchip/llvm-project
[OpenMP] Avoid remainder operations for loop index values on a collapsed loop nest.
Summary: Change the strategy for computing loop index variables after collapsing a loop nest via the collapse clause by replacing the expensive remainder operation with multiplications and additions. Reviewers: ABataev, caomhin Reviewed By: ABataev Subscribers: guansong, arphaman, cfe-commits Differential Revision: https://reviews.llvm.org/D56413 llvm-svn: 350759
This commit is contained in:
parent
a3afcf2445
commit
67796064c7
|
@ -5579,31 +5579,59 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
|
|||
Built.Updates.resize(NestedLoopCount);
|
||||
Built.Finals.resize(NestedLoopCount);
|
||||
{
|
||||
ExprResult Div;
|
||||
// Go from inner nested loop to outer.
|
||||
for (int Cnt = NestedLoopCount - 1; Cnt >= 0; --Cnt) {
|
||||
// We implement the following algorithm for obtaining the
|
||||
// original loop iteration variable values based on the
|
||||
// value of the collapsed loop iteration variable IV.
|
||||
//
|
||||
// Let n+1 be the number of collapsed loops in the nest.
|
||||
// Iteration variables (I0, I1, .... In)
|
||||
// Iteration counts (N0, N1, ... Nn)
|
||||
//
|
||||
// Acc = IV;
|
||||
//
|
||||
// To compute Ik for loop k, 0 <= k <= n, generate:
|
||||
// Prod = N(k+1) * N(k+2) * ... * Nn;
|
||||
// Ik = Acc / Prod;
|
||||
// Acc -= Ik * Prod;
|
||||
//
|
||||
ExprResult Acc = IV;
|
||||
for (unsigned int Cnt = 0; Cnt < NestedLoopCount; ++Cnt) {
|
||||
LoopIterationSpace &IS = IterSpaces[Cnt];
|
||||
SourceLocation UpdLoc = IS.IncSrcRange.getBegin();
|
||||
// Build: Iter = (IV / Div) % IS.NumIters
|
||||
// where Div is product of previous iterations' IS.NumIters.
|
||||
ExprResult Iter;
|
||||
if (Div.isUsable()) {
|
||||
Iter =
|
||||
SemaRef.BuildBinOp(CurScope, UpdLoc, BO_Div, IV.get(), Div.get());
|
||||
} else {
|
||||
Iter = IV;
|
||||
assert((Cnt == (int)NestedLoopCount - 1) &&
|
||||
"unusable div expected on first iteration only");
|
||||
}
|
||||
|
||||
if (Cnt != 0 && Iter.isUsable())
|
||||
Iter = SemaRef.BuildBinOp(CurScope, UpdLoc, BO_Rem, Iter.get(),
|
||||
IS.NumIterations);
|
||||
// Compute prod
|
||||
ExprResult Prod =
|
||||
SemaRef.ActOnIntegerConstant(SourceLocation(), 1).get();
|
||||
for (unsigned int K = Cnt+1; K < NestedLoopCount; ++K)
|
||||
Prod = SemaRef.BuildBinOp(CurScope, UpdLoc, BO_Mul, Prod.get(),
|
||||
IterSpaces[K].NumIterations);
|
||||
|
||||
// Iter = Acc / Prod
|
||||
// If there is at least one more inner loop to avoid
|
||||
// multiplication by 1.
|
||||
if (Cnt + 1 < NestedLoopCount)
|
||||
Iter = SemaRef.BuildBinOp(CurScope, UpdLoc, BO_Div,
|
||||
Acc.get(), Prod.get());
|
||||
else
|
||||
Iter = Acc;
|
||||
if (!Iter.isUsable()) {
|
||||
HasErrors = true;
|
||||
break;
|
||||
}
|
||||
|
||||
// Update Acc:
|
||||
// Acc -= Iter * Prod
|
||||
// Check if there is at least one more inner loop to avoid
|
||||
// multiplication by 1.
|
||||
if (Cnt + 1 < NestedLoopCount)
|
||||
Prod = SemaRef.BuildBinOp(CurScope, UpdLoc, BO_Mul,
|
||||
Iter.get(), Prod.get());
|
||||
else
|
||||
Prod = Iter;
|
||||
Acc = SemaRef.BuildBinOp(CurScope, UpdLoc, BO_Sub,
|
||||
Acc.get(), Prod.get());
|
||||
|
||||
// Build update: IS.CounterVar(Private) = IS.Start + Iter * IS.Step
|
||||
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IS.CounterVar)->getDecl());
|
||||
DeclRefExpr *CounterVar = buildDeclRefExpr(
|
||||
|
@ -5632,22 +5660,6 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
|
|||
break;
|
||||
}
|
||||
|
||||
// Build Div for the next iteration: Div <- Div * IS.NumIters
|
||||
if (Cnt != 0) {
|
||||
if (Div.isUnset())
|
||||
Div = IS.NumIterations;
|
||||
else
|
||||
Div = SemaRef.BuildBinOp(CurScope, UpdLoc, BO_Mul, Div.get(),
|
||||
IS.NumIterations);
|
||||
|
||||
// Add parentheses (for debugging purposes only).
|
||||
if (Div.isUsable())
|
||||
Div = tryBuildCapture(SemaRef, Div.get(), Captures);
|
||||
if (!Div.isUsable()) {
|
||||
HasErrors = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!Update.isUsable() || !Final.isUsable()) {
|
||||
HasErrors = true;
|
||||
break;
|
||||
|
|
|
@ -38,7 +38,6 @@ void loop_with_counter_collapse() {
|
|||
// LIFETIME: call void @llvm.lifetime.end
|
||||
// LIFETIME: call void @llvm.lifetime.end
|
||||
// LIFETIME: call void @llvm.lifetime.end
|
||||
// LIFETIME: call void @llvm.lifetime.end
|
||||
#pragma omp for collapse(2)
|
||||
for (int i = 0; i < 4; i++) {
|
||||
for (int j = i; j < 4; j++) {
|
||||
|
|
|
@ -408,7 +408,10 @@ int templ1(T a, T *z) {
|
|||
// CHECK-NEXT: [[I_2:%.+]] = trunc i64 [[I_1_ADD0]] to i32
|
||||
// CHECK-NEXT: store i32 [[I_2]], i32*
|
||||
// CHECK: [[IV2:%.+]] = load i64, i64* [[T1_OMP_IV]]
|
||||
// CHECK-NEXT: [[J_1:%.+]] = srem i64 [[IV2]], 4
|
||||
// CHECK-NEXT: [[IV2_1:%.+]] = load i64, i64* [[T1_OMP_IV]]
|
||||
// CHECK-NEXT: [[DIV_2:%.+]] = sdiv i64 [[IV2_1]], 4
|
||||
// CHECK-NEXT: [[MUL_2:%.+]] = mul nsw i64 [[DIV_2]], 4
|
||||
// CHECK-NEXT: [[J_1:%.+]] = sub nsw i64 [[IV2]], [[MUL_2]]
|
||||
// CHECK-NEXT: [[J_2:%.+]] = mul nsw i64 [[J_1]], 2
|
||||
// CHECK-NEXT: [[J_2_ADD0:%.+]] = add nsw i64 0, [[J_2]]
|
||||
// CHECK-NEXT: store i64 [[J_2_ADD0]], i64*
|
||||
|
@ -556,22 +559,70 @@ void collapsed(float *a, float *b, float *c, float *d) {
|
|||
// CHECK-NEXT: [[CALC_I_1_MUL1:%.+]] = mul i32 [[CALC_I_1]], 1
|
||||
// CHECK-NEXT: [[CALC_I_2:%.+]] = add i32 1, [[CALC_I_1_MUL1]]
|
||||
// CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]]
|
||||
|
||||
// CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]
|
||||
// CHECK-NEXT: [[CALC_J_1:%.+]] = udiv i32 [[IV1_2]], 20
|
||||
// CHECK-NEXT: [[CALC_J_2:%.+]] = urem i32 [[CALC_J_1]], 3
|
||||
// CHECK: [[IV1_2_1:%.+]] = load i32, i32* [[OMP_IV]]
|
||||
// CHECK-NEXT: [[CALC_J_1:%.+]] = udiv i32 [[IV1_2_1]], 60
|
||||
// CHECK-NEXT: [[MUL_1:%.+]] = mul i32 [[CALC_J_1]], 60
|
||||
// CHECK-NEXT: [[SUB_3:%.+]] = sub i32 [[IV1_2]], [[MUL_1]]
|
||||
// CHECK-NEXT: [[CALC_J_2:%.+]] = udiv i32 [[SUB_3]], 20
|
||||
// CHECK-NEXT: [[CALC_J_2_MUL1:%.+]] = mul i32 [[CALC_J_2]], 1
|
||||
// CHECK-NEXT: [[CALC_J_3:%.+]] = add i32 2, [[CALC_J_2_MUL1]]
|
||||
// CHECK-NEXT: store i32 [[CALC_J_3]], i32* [[LC_J:.+]]
|
||||
|
||||
// CHECK: [[IV1_3:%.+]] = load i32, i32* [[OMP_IV]]
|
||||
// CHECK-NEXT: [[CALC_K_1:%.+]] = udiv i32 [[IV1_3]], 5
|
||||
// CHECK-NEXT: [[CALC_K_2:%.+]] = urem i32 [[CALC_K_1]], 4
|
||||
// CHECK-NEXT: [[CALC_K_2_MUL1:%.+]] = mul i32 [[CALC_K_2]], 1
|
||||
// CHECK-NEXT: [[CALC_K_3:%.+]] = add i32 3, [[CALC_K_2_MUL1]]
|
||||
// CHECK-NEXT: store i32 [[CALC_K_3]], i32* [[LC_K:.+]]
|
||||
// CHECK: [[IV1_3_1:%.+]] = load i32, i32* [[OMP_IV]]
|
||||
// CHECK-NEXT: [[DIV_1:%.+]] = udiv i32 [[IV1_3_1]], 60
|
||||
// CHECK-NEXT: [[MUL_2:%.+]] = mul i32 [[DIV_1]], 60
|
||||
// CHECK-NEXT: [[ADD_3:%.+]] = sub i32 [[IV1_3]], [[MUL_2]]
|
||||
|
||||
// CHECK: [[IV1_4:%.+]] = load i32, i32* [[OMP_IV]]
|
||||
// CHECK-NEXT: [[CALC_L_1:%.+]] = urem i32 [[IV1_4]], 5
|
||||
// CHECK-NEXT: [[CALC_L_1_MUL1:%.+]] = mul i32 [[CALC_L_1]], 1
|
||||
// CHECK-NEXT: [[CALC_L_2:%.+]] = add i32 4, [[CALC_L_1_MUL1]]
|
||||
// CHECK: [[IV1_4_1:%.+]] = load i32, i32* [[OMP_IV]]
|
||||
// CHECK-NEXT: [[DIV_2:%.+]] = udiv i32 [[IV1_4_1]], 60
|
||||
// CHECK-NEXT: [[MUL_3:%.+]] = mul i32 [[DIV_2]], 60
|
||||
// CHECK-NEXT: [[SUB_6:%.+]] = sub i32 [[IV1_4]], [[MUL_3]]
|
||||
// CHECK-NEXT: [[DIV_3:%.+]] = udiv i32 [[SUB_6]], 20
|
||||
// CHECK-NEXT: [[MUL_4:%.+]] = mul i32 [[DIV_3]], 20
|
||||
// CHECK-NEXT: [[SUB_7:%.+]] = sub i32 [[ADD_3]], [[MUL_4]]
|
||||
// CHECK-NEXT: [[DIV_4:%.+]] = udiv i32 [[SUB_7]], 5
|
||||
// CHECK-NEXT: [[MUL_5:%.+]] = mul i32 [[DIV_4]], 1
|
||||
// CHECK-NEXT: [[ADD_6:%.+]] = add i32 3, [[MUL_5]]
|
||||
// CHECK-NEXT: store i32 [[ADD_6]], i32* [[LC_K:.+]]
|
||||
|
||||
// CHECK: [[IV1_5:%.+]] = load i32, i32* [[OMP_IV]]
|
||||
// CHECK: [[IV1_5_1:%.+]] = load i32, i32* [[OMP_IV]]
|
||||
// CHECK-NEXT: [[DIV_5:%.+]] = udiv i32 [[IV1_5_1]], 60
|
||||
// CHECK-NEXT: [[MUL_6:%.+]] = mul i32 [[DIV_5]], 60
|
||||
// CHECK-NEXT: [[ADD_7:%.+]] = sub i32 [[IV1_5]], [[MUL_6]]
|
||||
|
||||
// CHECK: [[IV1_6:%.+]] = load i32, i32* [[OMP_IV]]
|
||||
// CHECK: [[IV1_6_1:%.+]] = load i32, i32* [[OMP_IV]]
|
||||
// CHECK-NEXT: [[DIV_6:%.+]] = udiv i32 [[IV1_6_1]], 60
|
||||
// CHECK-NEXT: [[MUL_7:%.+]] = mul i32 [[DIV_6]], 60
|
||||
// CHECK-NEXT: [[SUB_10:%.+]] = sub i32 [[IV1_6]], [[MUL_7]]
|
||||
// CHECK-NEXT: [[DIV_7:%.+]] = udiv i32 [[SUB_10]], 20
|
||||
// CHECK-NEXT: [[MUL_8:%.+]] = mul i32 [[DIV_7]], 20
|
||||
// CHECK-NEXT: [[ADD_9:%.+]] = sub i32 [[ADD_7]], [[MUL_8]]
|
||||
|
||||
// CHECK: [[IV1_7:%.+]] = load i32, i32* [[OMP_IV]]
|
||||
// CHECK: [[IV1_7_1:%.+]] = load i32, i32* [[OMP_IV]]
|
||||
// CHECK-NEXT: [[DIV_8:%.+]] = udiv i32 [[IV1_7_1]], 60
|
||||
// CHECK-NEXT: [[MUL_9:%.+]] = mul i32 [[DIV_8]], 60
|
||||
// CHECK-NEXT: [[ADD_10:%.+]] = sub i32 [[IV1_7]], [[MUL_9]]
|
||||
|
||||
// CHECK: [[IV1_8:%.+]] = load i32, i32* [[OMP_IV]]
|
||||
// CHECK: [[IV1_8_1:%.+]] = load i32, i32* [[OMP_IV]]
|
||||
// CHECK-NEXT: [[DIV_3:%.+]] = udiv i32 [[IV1_8_1]], 60
|
||||
// CHECK-NEXT: [[MUL_4:%.+]] = mul i32 [[DIV_3]], 60
|
||||
// CHECK-NEXT: [[SUB_7:%.+]] = sub i32 [[IV1_8]], [[MUL_4]]
|
||||
// CHECK-NEXT: [[DIV_4:%.+]] = udiv i32 [[SUB_7]], 20
|
||||
// CHECK-NEXT: [[MUL_5:%.+]] = mul i32 [[DIV_4]], 20
|
||||
// CHECK-NEXT: [[SUB_8:%.+]] = sub i32 [[ADD_10]], [[MUL_5]]
|
||||
// CHECK-NEXT: [[DIV_5:%.+]] = udiv i32 [[SUB_8]], 5
|
||||
// CHECK-NEXT: [[MUL_6:%.+]] = mul i32 [[DIV_5]], 5
|
||||
// CHECK-NEXT: [[SUB_9:%.+]] = sub i32 [[ADD_9]], [[MUL_6]]
|
||||
// CHECK-NEXT: [[MUL_6:%.+]] = mul i32 [[SUB_9]], 1
|
||||
// CHECK-NEXT: [[CALC_L_2:%.+]] = add i32 4, [[MUL_6]]
|
||||
// CHECK-NEXT: [[CALC_L_3:%.+]] = trunc i32 [[CALC_L_2]] to i16
|
||||
// CHECK-NEXT: store i16 [[CALC_L_3]], i16* [[LC_L:.+]]
|
||||
// ... loop body ...
|
||||
|
|
|
@ -513,22 +513,70 @@ void collapsed(float *a, float *b, float *c, float *d) {
|
|||
// CHECK-NEXT: [[CALC_I_1_MUL1:%.+]] = mul i32 [[CALC_I_1]], 1
|
||||
// CHECK-NEXT: [[CALC_I_2:%.+]] = add i32 1, [[CALC_I_1_MUL1]]
|
||||
// CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]]
|
||||
|
||||
// CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]
|
||||
// CHECK-NEXT: [[CALC_J_1:%.+]] = udiv i32 [[IV1_2]], 20
|
||||
// CHECK-NEXT: [[CALC_J_2:%.+]] = urem i32 [[CALC_J_1]], 3
|
||||
// CHECK: [[IV1_2_1:%.+]] = load i32, i32* [[OMP_IV]]
|
||||
// CHECK-NEXT: [[CALC_J_1:%.+]] = udiv i32 [[IV1_2_1]], 60
|
||||
// CHECK-NEXT: [[MUL_1:%.+]] = mul i32 [[CALC_J_1]], 60
|
||||
// CHECK-NEXT: [[SUB_3:%.+]] = sub i32 [[IV1_2]], [[MUL_1]]
|
||||
// CHECK-NEXT: [[CALC_J_2:%.+]] = udiv i32 [[SUB_3]], 20
|
||||
// CHECK-NEXT: [[CALC_J_2_MUL1:%.+]] = mul i32 [[CALC_J_2]], 1
|
||||
// CHECK-NEXT: [[CALC_J_3:%.+]] = add i32 2, [[CALC_J_2_MUL1]]
|
||||
// CHECK-NEXT: store i32 [[CALC_J_3]], i32* [[LC_J:.+]]
|
||||
|
||||
// CHECK: [[IV1_3:%.+]] = load i32, i32* [[OMP_IV]]
|
||||
// CHECK-NEXT: [[CALC_K_1:%.+]] = udiv i32 [[IV1_3]], 5
|
||||
// CHECK-NEXT: [[CALC_K_2:%.+]] = urem i32 [[CALC_K_1]], 4
|
||||
// CHECK-NEXT: [[CALC_K_2_MUL1:%.+]] = mul i32 [[CALC_K_2]], 1
|
||||
// CHECK-NEXT: [[CALC_K_3:%.+]] = add i32 3, [[CALC_K_2_MUL1]]
|
||||
// CHECK-NEXT: store i32 [[CALC_K_3]], i32* [[LC_K:.+]]
|
||||
// CHECK: [[IV1_3_1:%.+]] = load i32, i32* [[OMP_IV]]
|
||||
// CHECK-NEXT: [[DIV_1:%.+]] = udiv i32 [[IV1_3_1]], 60
|
||||
// CHECK-NEXT: [[MUL_2:%.+]] = mul i32 [[DIV_1]], 60
|
||||
// CHECK-NEXT: [[ADD_3:%.+]] = sub i32 [[IV1_3]], [[MUL_2]]
|
||||
|
||||
// CHECK: [[IV1_4:%.+]] = load i32, i32* [[OMP_IV]]
|
||||
// CHECK-NEXT: [[CALC_L_1:%.+]] = urem i32 [[IV1_4]], 5
|
||||
// CHECK-NEXT: [[CALC_L_1_MUL1:%.+]] = mul i32 [[CALC_L_1]], 1
|
||||
// CHECK-NEXT: [[CALC_L_2:%.+]] = add i32 4, [[CALC_L_1_MUL1]]
|
||||
// CHECK: [[IV1_4_1:%.+]] = load i32, i32* [[OMP_IV]]
|
||||
// CHECK-NEXT: [[DIV_2:%.+]] = udiv i32 [[IV1_4_1]], 60
|
||||
// CHECK-NEXT: [[MUL_3:%.+]] = mul i32 [[DIV_2]], 60
|
||||
// CHECK-NEXT: [[SUB_6:%.+]] = sub i32 [[IV1_4]], [[MUL_3]]
|
||||
// CHECK-NEXT: [[DIV_3:%.+]] = udiv i32 [[SUB_6]], 20
|
||||
// CHECK-NEXT: [[MUL_4:%.+]] = mul i32 [[DIV_3]], 20
|
||||
// CHECK-NEXT: [[SUB_7:%.+]] = sub i32 [[ADD_3]], [[MUL_4]]
|
||||
// CHECK-NEXT: [[DIV_4:%.+]] = udiv i32 [[SUB_7]], 5
|
||||
// CHECK-NEXT: [[MUL_5:%.+]] = mul i32 [[DIV_4]], 1
|
||||
// CHECK-NEXT: [[ADD_6:%.+]] = add i32 3, [[MUL_5]]
|
||||
// CHECK-NEXT: store i32 [[ADD_6]], i32* [[LC_K:.+]]
|
||||
|
||||
// CHECK: [[IV1_5:%.+]] = load i32, i32* [[OMP_IV]]
|
||||
// CHECK: [[IV1_5_1:%.+]] = load i32, i32* [[OMP_IV]]
|
||||
// CHECK-NEXT: [[DIV_5:%.+]] = udiv i32 [[IV1_5_1]], 60
|
||||
// CHECK-NEXT: [[MUL_6:%.+]] = mul i32 [[DIV_5]], 60
|
||||
// CHECK-NEXT: [[ADD_7:%.+]] = sub i32 [[IV1_5]], [[MUL_6]]
|
||||
|
||||
// CHECK: [[IV1_6:%.+]] = load i32, i32* [[OMP_IV]]
|
||||
// CHECK: [[IV1_6_1:%.+]] = load i32, i32* [[OMP_IV]]
|
||||
// CHECK-NEXT: [[DIV_6:%.+]] = udiv i32 [[IV1_6_1]], 60
|
||||
// CHECK-NEXT: [[MUL_7:%.+]] = mul i32 [[DIV_6]], 60
|
||||
// CHECK-NEXT: [[SUB_10:%.+]] = sub i32 [[IV1_6]], [[MUL_7]]
|
||||
// CHECK-NEXT: [[DIV_7:%.+]] = udiv i32 [[SUB_10]], 20
|
||||
// CHECK-NEXT: [[MUL_8:%.+]] = mul i32 [[DIV_7]], 20
|
||||
// CHECK-NEXT: [[ADD_9:%.+]] = sub i32 [[ADD_7]], [[MUL_8]]
|
||||
|
||||
// CHECK: [[IV1_7:%.+]] = load i32, i32* [[OMP_IV]]
|
||||
// CHECK: [[IV1_7_1:%.+]] = load i32, i32* [[OMP_IV]]
|
||||
// CHECK-NEXT: [[DIV_8:%.+]] = udiv i32 [[IV1_7_1]], 60
|
||||
// CHECK-NEXT: [[MUL_9:%.+]] = mul i32 [[DIV_8]], 60
|
||||
// CHECK-NEXT: [[ADD_10:%.+]] = sub i32 [[IV1_7]], [[MUL_9]]
|
||||
|
||||
// CHECK: [[IV1_8:%.+]] = load i32, i32* [[OMP_IV]]
|
||||
// CHECK: [[IV1_8_1:%.+]] = load i32, i32* [[OMP_IV]]
|
||||
// CHECK-NEXT: [[DIV_3:%.+]] = udiv i32 [[IV1_8_1]], 60
|
||||
// CHECK-NEXT: [[MUL_4:%.+]] = mul i32 [[DIV_3]], 60
|
||||
// CHECK-NEXT: [[SUB_7:%.+]] = sub i32 [[IV1_8]], [[MUL_4]]
|
||||
// CHECK-NEXT: [[DIV_4:%.+]] = udiv i32 [[SUB_7]], 20
|
||||
// CHECK-NEXT: [[MUL_5:%.+]] = mul i32 [[DIV_4]], 20
|
||||
// CHECK-NEXT: [[SUB_8:%.+]] = sub i32 [[ADD_10]], [[MUL_5]]
|
||||
// CHECK-NEXT: [[DIV_5:%.+]] = udiv i32 [[SUB_8]], 5
|
||||
// CHECK-NEXT: [[MUL_6:%.+]] = mul i32 [[DIV_5]], 5
|
||||
// CHECK-NEXT: [[SUB_9:%.+]] = sub i32 [[ADD_9]], [[MUL_6]]
|
||||
// CHECK-NEXT: [[MUL_6:%.+]] = mul i32 [[SUB_9]], 1
|
||||
// CHECK-NEXT: [[CALC_L_2:%.+]] = add i32 4, [[MUL_6]]
|
||||
// CHECK-NEXT: [[CALC_L_3:%.+]] = trunc i32 [[CALC_L_2]] to i16
|
||||
// CHECK-NEXT: store i16 [[CALC_L_3]], i16* [[LC_L:.+]]
|
||||
// ... loop body ...
|
||||
|
@ -655,7 +703,10 @@ void widened(float *a, float *b, float *c, float *d) {
|
|||
// CHECK-NEXT: [[I_2:%.+]] = trunc i64 [[I_1_ADD0]] to i32
|
||||
// CHECK-NEXT: store i32 [[I_2]], i32*
|
||||
// CHECK: [[IV2:%.+]] = load i64, i64* [[T1_OMP_IV]]
|
||||
// CHECK-NEXT: [[J_1:%.+]] = srem i64 [[IV2]], 4
|
||||
// CHECK: [[IV2_1:%.+]] = load i64, i64* [[T1_OMP_IV]]
|
||||
// CHECK-NEXT: [[DIV_1:%.+]] = sdiv i64 [[IV2_1]], 4
|
||||
// CHECK-NEXT: [[MUL_1:%.+]] = mul nsw i64 [[DIV_1]], 4
|
||||
// CHECK-NEXT: [[J_1:%.+]] = sub nsw i64 [[IV2]], [[MUL_1]]
|
||||
// CHECK-NEXT: [[J_2:%.+]] = mul nsw i64 [[J_1]], 2
|
||||
// CHECK-NEXT: [[J_2_ADD0:%.+]] = add nsw i64 0, [[J_2]]
|
||||
// CHECK-NEXT: store i64 [[J_2_ADD0]], i64*
|
||||
|
|
|
@ -278,8 +278,11 @@ int templ1(T a, T *z) {
|
|||
// CHECK-NEXT: [[I_2:%.+]] = trunc i64 [[I_1_ADD0]] to i32
|
||||
// CHECK-NEXT: store i32 [[I_2]], i32* {{%.+}}{{.*}}!llvm.access.group
|
||||
// CHECK: [[IV2:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.access.group
|
||||
// CHECK-NEXT: [[J_1:%.+]] = srem i64 [[IV2]], 4
|
||||
// CHECK-NEXT: [[J_2:%.+]] = mul nsw i64 [[J_1]], 2
|
||||
// CHECK: [[IV2_1:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.access.group
|
||||
// CHECK-NEXT: [[J_1_DIV1:%.+]] = sdiv i64 [[IV2_1]], 4
|
||||
// CHECK-NEXT: [[J_1_MUL1:%.+]] = mul nsw i64 [[J_1_DIV1]], 4
|
||||
// CHECK-NEXT: [[J_1_SUB0:%.+]] = sub nsw i64 [[IV2]], [[J_1_MUL1]]
|
||||
// CHECK-NEXT: [[J_2:%.+]] = mul nsw i64 [[J_1_SUB0]], 2
|
||||
// CHECK-NEXT: [[J_2_ADD0:%.+]] = add nsw i64 0, [[J_2]]
|
||||
// CHECK-NEXT: store i64 [[J_2_ADD0]], i64* {{%.+}}{{.*}}!llvm.access.group
|
||||
// simd.for.inc:
|
||||
|
@ -393,22 +396,70 @@ void collapsed(float *a, float *b, float *c, float *d) {
|
|||
// CHECK-NEXT: [[CALC_I_1_MUL1:%.+]] = mul i32 [[CALC_I_1]], 1
|
||||
// CHECK-NEXT: [[CALC_I_2:%.+]] = add i32 1, [[CALC_I_1_MUL1]]
|
||||
// CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]]
|
||||
|
||||
// CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.access.group
|
||||
// CHECK-NEXT: [[CALC_J_1:%.+]] = udiv i32 [[IV1_2]], 20
|
||||
// CHECK-NEXT: [[CALC_J_2:%.+]] = urem i32 [[CALC_J_1]], 3
|
||||
// CHECK: [[IV1_2_1:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.access.group
|
||||
// CHECK-NEXT: [[CALC_J_1:%.+]] = udiv i32 [[IV1_2_1]], 60
|
||||
// CHECK-NEXT: [[MUL_1:%.+]] = mul i32 [[CALC_J_1]], 60
|
||||
// CHECK-NEXT: [[SUB_3:%.+]] = sub i32 [[IV1_2]], [[MUL_1]]
|
||||
// CHECK-NEXT: [[CALC_J_2:%.+]] = udiv i32 [[SUB_3]], 20
|
||||
// CHECK-NEXT: [[CALC_J_2_MUL1:%.+]] = mul i32 [[CALC_J_2]], 1
|
||||
// CHECK-NEXT: [[CALC_J_3:%.+]] = add i32 2, [[CALC_J_2_MUL1]]
|
||||
// CHECK-NEXT: store i32 [[CALC_J_3]], i32* [[LC_J:.+]]
|
||||
|
||||
// CHECK: [[IV1_3:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.access.group
|
||||
// CHECK-NEXT: [[CALC_K_1:%.+]] = udiv i32 [[IV1_3]], 5
|
||||
// CHECK-NEXT: [[CALC_K_2:%.+]] = urem i32 [[CALC_K_1]], 4
|
||||
// CHECK-NEXT: [[CALC_K_2_MUL1:%.+]] = mul i32 [[CALC_K_2]], 1
|
||||
// CHECK-NEXT: [[CALC_K_3:%.+]] = add i32 3, [[CALC_K_2_MUL1]]
|
||||
// CHECK-NEXT: store i32 [[CALC_K_3]], i32* [[LC_K:.+]]
|
||||
// CHECK: [[IV1_4:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.access.group
|
||||
// CHECK-NEXT: [[CALC_L_1:%.+]] = urem i32 [[IV1_4]], 5
|
||||
// CHECK-NEXT: [[CALC_L_1_MUL1:%.+]] = mul i32 [[CALC_L_1]], 1
|
||||
// CHECK-NEXT: [[CALC_L_2:%.+]] = add i32 4, [[CALC_L_1_MUL1]]
|
||||
// CHECK: [[IV1_3_1:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.access.group
|
||||
// CHECK-NEXT: [[DIV_1:%.+]] = udiv i32 [[IV1_3_1]], 60
|
||||
// CHECK-NEXT: [[MUL_2:%.+]] = mul i32 [[DIV_1]], 60
|
||||
// CHECK-NEXT: [[ADD_3:%.+]] = sub i32 [[IV1_3]], [[MUL_2]]
|
||||
|
||||
// CHECK: [[IV1_4:%.+]] = load i32, i32* [[OMP_IV]]
|
||||
// CHECK: [[IV1_4_1:%.+]] = load i32, i32* [[OMP_IV]]
|
||||
// CHECK-NEXT: [[DIV_2:%.+]] = udiv i32 [[IV1_4_1]], 60
|
||||
// CHECK-NEXT: [[MUL_3:%.+]] = mul i32 [[DIV_2]], 60
|
||||
// CHECK-NEXT: [[SUB_6:%.+]] = sub i32 [[IV1_4]], [[MUL_3]]
|
||||
// CHECK-NEXT: [[DIV_3:%.+]] = udiv i32 [[SUB_6]], 20
|
||||
// CHECK-NEXT: [[MUL_4:%.+]] = mul i32 [[DIV_3]], 20
|
||||
// CHECK-NEXT: [[ADD_5:%.+]] = sub i32 [[ADD_3]], [[MUL_4]]
|
||||
// CHECK-NEXT: [[DIV_4:%.+]] = udiv i32 [[ADD_5]], 5
|
||||
// CHECK-NEXT: [[MUL_5:%.+]] = mul i32 [[DIV_4]], 1
|
||||
// CHECK-NEXT: [[ADD_6:%.+]] = add i32 3, [[MUL_5]]
|
||||
// CHECK-NEXT: store i32 [[ADD_6]], i32* [[LC_K:.+]]
|
||||
|
||||
// CHECK: [[IV1_5:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.access.group
|
||||
// CHECK: [[IV1_5_1:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.access.group
|
||||
// CHECK-NEXT: [[DIV_5:%.+]] = udiv i32 [[IV1_5_1]], 60
|
||||
// CHECK-NEXT: [[MUL_6:%.+]] = mul i32 [[DIV_5]], 60
|
||||
// CHECK-NEXT: [[SUB_7:%.+]] = sub i32 [[IV1_5]], [[MUL_6]]
|
||||
|
||||
// CHECK: [[IV1_6:%.+]] = load i32, i32* [[OMP_IV]]
|
||||
// CHECK: [[IV1_6_1:%.+]] = load i32, i32* [[OMP_IV]]
|
||||
// CHECK-NEXT: [[DIV_6:%.+]] = udiv i32 [[IV1_6_1]], 60
|
||||
// CHECK-NEXT: [[MUL_7:%.+]] = mul i32 [[DIV_6]], 60
|
||||
// CHECK-NEXT: [[SUB_10:%.+]] = sub i32 [[IV1_6]], [[MUL_7]]
|
||||
// CHECK-NEXT: [[DIV_7:%.+]] = udiv i32 [[SUB_10]], 20
|
||||
// CHECK-NEXT: [[MUL_8:%.+]] = mul i32 [[DIV_7]], 20
|
||||
// CHECK-NEXT: [[SUB_11:%.+]] = sub i32 [[SUB_7]], [[MUL_8]]
|
||||
|
||||
// CHECK: [[IV1_7:%.+]] = load i32, i32* [[OMP_IV]]
|
||||
// CHECK: [[IV1_7_1:%.+]] = load i32, i32* [[OMP_IV]]
|
||||
// CHECK-NEXT: [[DIV_8:%.+]] = udiv i32 [[IV1_7_1]], 60
|
||||
// CHECK-NEXT: [[MUL_9:%.+]] = mul i32 [[DIV_8]], 60
|
||||
// CHECK-NEXT: [[SUB_12:%.+]] = sub i32 [[IV1_7]], [[MUL_9]]
|
||||
|
||||
// CHECK: [[IV1_8:%.+]] = load i32, i32* [[OMP_IV]]
|
||||
// CHECK: [[IV1_8_1:%.+]] = load i32, i32* [[OMP_IV]]
|
||||
// CHECK-NEXT: [[DIV_3:%.+]] = udiv i32 [[IV1_8_1]], 60
|
||||
// CHECK-NEXT: [[MUL_4:%.+]] = mul i32 [[DIV_3]], 60
|
||||
// CHECK-NEXT: [[SUB_7:%.+]] = sub i32 [[IV1_8]], [[MUL_4]]
|
||||
// CHECK-NEXT: [[DIV_4:%.+]] = udiv i32 [[SUB_7]], 20
|
||||
// CHECK-NEXT: [[MUL_5:%.+]] = mul i32 [[DIV_4]], 20
|
||||
// CHECK-NEXT: [[SUB_8:%.+]] = sub i32 [[SUB_12]], [[MUL_5]]
|
||||
// CHECK-NEXT: [[DIV_5:%.+]] = udiv i32 [[SUB_8]], 5
|
||||
// CHECK-NEXT: [[MUL_6:%.+]] = mul i32 [[DIV_5]], 5
|
||||
// CHECK-NEXT: [[SUB_9:%.+]] = sub i32 [[SUB_11]], [[MUL_6]]
|
||||
// CHECK-NEXT: [[MUL_6:%.+]] = mul i32 [[SUB_9]], 1
|
||||
// CHECK-NEXT: [[CALC_L_2:%.+]] = add i32 4, [[MUL_6]]
|
||||
// CHECK-NEXT: [[CALC_L_3:%.+]] = trunc i32 [[CALC_L_2]] to i16
|
||||
// CHECK-NEXT: store i16 [[CALC_L_3]], i16* [[LC_L:.+]]
|
||||
// ... loop body ...
|
||||
|
|
Loading…
Reference in New Issue