From 39de82ecc9c2e461e1318ed9926286a1eed2be3f Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 19 Nov 2019 10:47:07 -0500 Subject: [PATCH] [SLP] fix insertion point for min/max reduction As discussed in D70148 (and caused a revert of the original commit): if we insert at the select, then we can produce invalid IR because the replacement for the compare may have uses before the select. --- .../Transforms/Vectorize/SLPVectorizer.cpp | 19 +++++++++++++++++-- .../Transforms/SLPVectorizer/X86/reduction.ll | 2 +- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 373d7a7ffd13..fc4f63e4a5b7 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6741,8 +6741,23 @@ public: DebugLoc Loc = cast(ReducedVals[i])->getDebugLoc(); Value *VectorizedRoot = V.vectorizeTree(ExternallyUsedValues); - // Emit a reduction. - Builder.SetInsertPoint(cast(ReductionRoot)); + auto getCmpForMinMaxReduction = [](Instruction *RdxRootInst) { + assert(isa(RdxRootInst) && + "Expected min/max reduction to have select root instruction"); + Value *ScalarCond = cast(RdxRootInst)->getCondition(); + assert(isa(ScalarCond) && + "Expected min/max reduction to have compare condition"); + return cast(ScalarCond); + }; + + // Emit a reduction. For min/max, the root is a select, but the insertion + // point is the compare condition of that select. + Instruction *RdxRootInst = cast(ReductionRoot); + if (ReductionData.isMinMax()) + Builder.SetInsertPoint(getCmpForMinMaxReduction(RdxRootInst)); + else + Builder.SetInsertPoint(RdxRootInst); + Value *ReducedSubTree = emitReduction(VectorizedRoot, Builder, ReduxWidth, TTI); if (VectorizedTree) { diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll index f6e1d0ad2fea..3a82ee5fa45c 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll @@ -131,13 +131,13 @@ define i1 @bad_insertpoint_rdx([8 x i32]* %p) #0 { ; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* [[P:%.*]], i64 0, i64 0 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[ARRAYIDX22]] to <2 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 16 -; CHECK-NEXT: [[SPEC_STORE_SELECT87:%.*]] = zext i1 undef to i32 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> undef, <2 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <2 x i32> [[TMP2]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i32> [[TMP2]], <2 x i32> [[RDX_SHUF]] ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[RDX_MINMAX_SELECT]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], 0 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 0 +; CHECK-NEXT: [[SPEC_STORE_SELECT87:%.*]] = zext i1 undef to i32 ; CHECK-NEXT: [[CMP23_2:%.*]] = icmp sgt i32 [[SPEC_STORE_SELECT87]], [[OP_EXTRA]] ; CHECK-NEXT: ret i1 [[CMP23_2]] ;