diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 3592df3ede3d..a1c642e73444 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5453,7 +5453,7 @@ class HorizontalReduction { } }; - Instruction *ReductionRoot = nullptr; + WeakTrackingVH ReductionRoot; /// The operation data of the reduction operation. OperationData ReductionData; @@ -5738,7 +5738,7 @@ public: unsigned ReduxWidth = PowerOf2Floor(NumReducedVals); Value *VectorizedTree = nullptr; - IRBuilder<> Builder(ReductionRoot); + IRBuilder<> Builder(cast(ReductionRoot)); FastMathFlags Unsafe; Unsafe.setFast(); Builder.setFastMathFlags(Unsafe); @@ -5747,8 +5747,13 @@ public: BoUpSLP::ExtraValueToDebugLocsMap ExternallyUsedValues; // The same extra argument may be used several time, so log each attempt // to use it. - for (auto &Pair : ExtraArgs) + for (auto &Pair : ExtraArgs) { + assert(Pair.first && "DebugLoc must be set."); ExternallyUsedValues[Pair.second].push_back(Pair.first); + } + // The reduction root is used as the insertion point for new instructions, + // so set it as externally used to prevent it from being deleted. + ExternallyUsedValues[ReductionRoot]; SmallVector IgnoreList; for (auto &V : ReductionOps) IgnoreList.append(V.begin(), V.end()); @@ -5800,6 +5805,7 @@ public: Value *VectorizedRoot = V.vectorizeTree(ExternallyUsedValues); // Emit a reduction. + Builder.SetInsertPoint(cast(ReductionRoot)); Value *ReducedSubTree = emitReduction(VectorizedRoot, Builder, ReduxWidth, TTI); if (VectorizedTree) { @@ -5826,8 +5832,6 @@ public: VectorizedTree = VectReductionData.createOp(Builder, "", ReductionOps); } for (auto &Pair : ExternallyUsedValues) { - assert(!Pair.second.empty() && - "At least one DebugLoc must be inserted"); // Add each externally used value to the final reduction. for (auto *I : Pair.second) { Builder.SetCurrentDebugLocation(I->getDebugLoc()); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll new file mode 100644 index 000000000000..3ebccb04d076 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll @@ -0,0 +1,108 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake -slp-threshold=-7 | FileCheck %s + +define void @Test(i32) { +; CHECK-LABEL: @Test( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LOCAL_4_39_US:%.*]] = phi i32 [ [[VAL_42:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LOCAL_8_43_US:%.*]] = phi i32 [ [[VAL_43:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[VAL_0:%.*]] = add i32 [[LOCAL_4_39_US]], 0 +; CHECK-NEXT: [[VAL_1:%.*]] = and i32 [[LOCAL_8_43_US]], [[VAL_0]] +; CHECK-NEXT: [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]] +; CHECK-NEXT: [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]] +; CHECK-NEXT: [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]] +; CHECK-NEXT: [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]] +; CHECK-NEXT: [[VAL_6:%.*]] = add i32 [[LOCAL_8_43_US]], 55 +; CHECK-NEXT: [[VAL_7:%.*]] = and i32 [[VAL_5]], [[VAL_6]] +; CHECK-NEXT: [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]] +; CHECK-NEXT: [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]] +; CHECK-NEXT: [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]] +; CHECK-NEXT: [[VAL_11:%.*]] = add i32 [[LOCAL_8_43_US]], 285 +; CHECK-NEXT: [[VAL_12:%.*]] = and i32 [[VAL_10]], [[VAL_11]] +; CHECK-NEXT: [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]] +; CHECK-NEXT: [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]] +; CHECK-NEXT: [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]] +; CHECK-NEXT: [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]] +; CHECK-NEXT: [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]] +; CHECK-NEXT: [[VAL_18:%.*]] = add i32 [[LOCAL_8_43_US]], 1240 +; CHECK-NEXT: [[VAL_19:%.*]] = and i32 [[VAL_17]], [[VAL_18]] +; CHECK-NEXT: [[VAL_20:%.*]] = add i32 [[LOCAL_8_43_US]], 1496 +; CHECK-NEXT: [[VAL_21:%.*]] = and i32 [[VAL_19]], [[VAL_20]] +; CHECK-NEXT: [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]] +; CHECK-NEXT: [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]] +; CHECK-NEXT: [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]] +; CHECK-NEXT: [[VAL_25:%.*]] = and i32 [[VAL_24]], [[TMP0]] +; CHECK-NEXT: [[VAL_26:%.*]] = and i32 [[VAL_25]], [[TMP0]] +; CHECK-NEXT: [[VAL_27:%.*]] = and i32 [[VAL_26]], [[TMP0]] +; CHECK-NEXT: [[VAL_28:%.*]] = and i32 [[VAL_27]], [[TMP0]] +; CHECK-NEXT: [[VAL_29:%.*]] = and i32 [[VAL_28]], [[TMP0]] +; CHECK-NEXT: [[VAL_30:%.*]] = and i32 [[VAL_29]], [[TMP0]] +; CHECK-NEXT: [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]] +; CHECK-NEXT: [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]] +; CHECK-NEXT: [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]] +; CHECK-NEXT: [[VAL_34:%.*]] = add i32 [[LOCAL_8_43_US]], 8555 +; CHECK-NEXT: [[VAL_35:%.*]] = and i32 [[VAL_33]], [[VAL_34]] +; CHECK-NEXT: [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]] +; CHECK-NEXT: [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]] +; CHECK-NEXT: [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]] +; CHECK-NEXT: [[VAL_39:%.*]] = add i32 [[LOCAL_8_43_US]], 12529 +; CHECK-NEXT: [[VAL_40:%.*]] = and i32 [[VAL_38]], [[VAL_39]] +; CHECK-NEXT: [[VAL_41:%.*]] = add i32 [[LOCAL_8_43_US]], 13685 +; CHECK-NEXT: [[VAL_42]] = and i32 [[VAL_40]], [[VAL_41]] +; CHECK-NEXT: [[VAL_43]] = add i32 [[LOCAL_8_43_US]], 14910 +; CHECK-NEXT: br label [[LOOP]] +; +entry: + br label %loop + +loop: + %local_4_39.us = phi i32 [ %val_42, %loop ], [ 0, %entry ] + %local_8_43.us = phi i32 [ %val_43, %loop ], [ 0, %entry ] + %val_0 = add i32 %local_4_39.us, 0 + %val_1 = and i32 %local_8_43.us, %val_0 + %val_2 = and i32 %val_1, %0 + %val_3 = and i32 %val_2, %0 + %val_4 = and i32 %val_3, %0 + %val_5 = and i32 %val_4, %0 + %val_6 = add i32 %local_8_43.us, 55 + %val_7 = and i32 %val_5, %val_6 + %val_8 = and i32 %val_7, %0 + %val_9 = and i32 %val_8, %0 + %val_10 = and i32 %val_9, %0 + %val_11 = add i32 %local_8_43.us, 285 + %val_12 = and i32 %val_10, %val_11 + %val_13 = and i32 %val_12, %0 + %val_14 = and i32 %val_13, %0 + %val_15 = and i32 %val_14, %0 + %val_16 = and i32 %val_15, %0 + %val_17 = and i32 %val_16, %0 + %val_18 = add i32 %local_8_43.us, 1240 + %val_19 = and i32 %val_17, %val_18 + %val_20 = add i32 %local_8_43.us, 1496 + %val_21 = and i32 %val_19, %val_20 + %val_22 = and i32 %val_21, %0 + %val_23 = and i32 %val_22, %0 + %val_24 = and i32 %val_23, %0 + %val_25 = and i32 %val_24, %0 + %val_26 = and i32 %val_25, %0 + %val_27 = and i32 %val_26, %0 + %val_28 = and i32 %val_27, %0 + %val_29 = and i32 %val_28, %0 + %val_30 = and i32 %val_29, %0 + %val_31 = and i32 %val_30, %0 + %val_32 = and i32 %val_31, %0 + %val_33 = and i32 %val_32, %0 + %val_34 = add i32 %local_8_43.us, 8555 + %val_35 = and i32 %val_33, %val_34 + %val_36 = and i32 %val_35, %0 + %val_37 = and i32 %val_36, %0 + %val_38 = and i32 %val_37, %0 + %val_39 = add i32 %local_8_43.us, 12529 + %val_40 = and i32 %val_38, %val_39 + %val_41 = add i32 %local_8_43.us, 13685 + %val_42 = and i32 %val_40, %val_41 + %val_43 = add i32 %local_8_43.us, 14910 + br label %loop +}