From 24b5f8e0d0bd6851e01dbe2de4c71df4b3b0760b Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 30 Jun 2022 09:08:33 +0100 Subject: [PATCH] [VPlan] Make sure optimizeInductions removes wide ind from scalar plan. In some cases, there may be widened users of inductions even though the plan includes the scalar VF. In those cases, make sure we still replace the VPWidenIntOrFpInductionRecipe with scalar steps, as otherwise we may try to execute a VPWidenIntOrFpInductionRecipe with a scalar VF. Alternatively the patch could also split the range if needed. This fixes a crash exposed by D123720. Reviewed By: Ayal Differential Revision: https://reviews.llvm.org/D128755 --- .../Transforms/Vectorize/VPlanTransforms.cpp | 9 ++- .../interleave-and-scalarize-only.ll | 59 +++++++++++++++++++ 2 files changed, 65 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index ef0de6d6d9e0..cca484e13bf1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -381,10 +381,13 @@ void VPlanTransforms::removeDeadRecipes(VPlan &Plan) { void VPlanTransforms::optimizeInductions(VPlan &Plan, ScalarEvolution &SE) { SmallVector ToRemove; VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); + bool HasOnlyVectorVFs = !Plan.hasVF(ElementCount::getFixed(1)); for (VPRecipeBase &Phi : HeaderVPBB->phis()) { auto *IV = dyn_cast(&Phi); - if (!IV || - all_of(IV->users(), [IV](VPUser *U) { return !U->usesScalars(IV); })) + if (!IV) + continue; + if (HasOnlyVectorVFs && + none_of(IV->users(), [IV](VPUser *U) { return U->usesScalars(IV); })) continue; const InductionDescriptor &ID = IV->getInductionDescriptor(); @@ -400,7 +403,7 @@ void VPlanTransforms::optimizeInductions(VPlan &Plan, ScalarEvolution &SE) { // the list of users doesn't contain duplicates. SetVector Users(IV->user_begin(), IV->user_end()); for (VPUser *U : Users) { - if (!U->usesScalars(IV)) + if (HasOnlyVectorVFs && !U->usesScalars(IV)) continue; for (unsigned I = 0, E = U->getNumOperands(); I != E; I++) { if (U->getOperand(I) != IV) diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll index 0e9b686affef..da07eadd09b2 100644 --- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll @@ -168,3 +168,62 @@ loop.latch: exit: ret void } + +; Make sure the widened induction gets replaced by scalar-steps for plans +; including the scalar VF, if it is used in first-order recurrences. + +; DBG-LABEL: 'first_order_recurrence_using_induction' +; DBG: VPlan 'Initial VPlan for VF={1},UF>=1' { +; DBG-NEXT: Live-in vp<%1> = vector-trip-count +; DBG-EMPTY: +; DBG-NEXT: vector.ph: +; DBG-NEXT: Successor(s): vector loop +; DBG-EMPTY: +; DBG-NEXT: vector loop: { +; DBG-NEXT: vector.body: +; DBG-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION +; DBG-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for> = phi ir<0>, vp<%4> +; DBG-NEXT: vp<%4> = SCALAR-STEPS vp<%2>, ir<0>, ir<1> +; DBG-NEXT: EMIT vp<%5> = first-order splice ir<%for> vp<%4> +; DBG-NEXT: CLONE store vp<%5>, ir<%dst> +; DBG-NEXT: EMIT vp<%7> = VF * UF +(nuw) vp<%2> +; DBG-NEXT: EMIT branch-on-count vp<%7> vp<%1> +; DBG-NEXT: No successors +; DBG-NEXT: } +; DBG-NEXT: Successor(s): middle.block +; DBG-EMPTY: +; DBG-NEXT: middle.block: +; DBG-NEXT: No successors +; DBG-NEXT: } + +define void @first_order_recurrence_using_induction(i32 %n, ptr %dst) { +; CHECK-LABEL: @first_order_recurrence_using_induction( +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] +; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ 0, %vector.ph ], [ [[INDUCTION1:%.*]], %vector.body ] +; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[INDEX]] to i32 +; CHECK-NEXT: [[INDUCTION:%.*]] = add i32 [[TMP3]], 0 +; CHECK-NEXT: [[INDUCTION1]] = add i32 [[TMP3]], 1 +; CHECK-NEXT: store i32 [[VECTOR_RECUR]], ptr [[DST:%.*]], align 4 +; CHECK-NEXT: store i32 [[INDUCTION]], ptr [[DST]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], %n.vec +; CHECK-NEXT: br i1 [[TMP4]], label %middle.block, label %vector.body +; CHECK: middle.block: +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ],[ %iv.next, %loop ] + %for = phi i32 [ 0, %entry ], [ %iv.trunc, %loop ] + %iv.trunc = trunc i64 %iv to i32 + store i32 %for, ptr %dst + %iv.next = add nuw nsw i64 %iv, 1 + %iv.next.trunc = trunc i64 %iv.next to i32 + %ec = icmp slt i32 %iv.next.trunc, %n + br i1 %ec, label %loop, label %exit + +exit: + ret void +}