[Scalarizer] Treat values from unreachable blocks as undef

Summary:
When scalarizing PHI nodes we might try to examine/rewrite
InsertElement nodes in predecessors. If those predecessors
are unreachable from entry, then the IR in those blocks could
have unexpected properties resulting in infinite loops in
Scatterer::operator[].
By simply treating values originating from instructions in
unreachable blocks as undef we do not need to analyse them
further.

This fixes PR41723.

Reviewers: bjope

Reviewed By: bjope

Subscribers: bjope, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D70171
This commit is contained in:
Mikael Holmen 2019-11-15 10:29:47 +01:00
parent 1643734741
commit 1587c7e86f
2 changed files with 125 additions and 5 deletions

View File

@ -22,6 +22,7 @@
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@ -174,8 +175,8 @@ struct VectorLayout {
class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
public:
ScalarizerVisitor(unsigned ParallelLoopAccessMDKind)
: ParallelLoopAccessMDKind(ParallelLoopAccessMDKind) {
ScalarizerVisitor(unsigned ParallelLoopAccessMDKind, DominatorTree *DT)
: ParallelLoopAccessMDKind(ParallelLoopAccessMDKind), DT(DT) {
}
bool visit(Function &F);
@ -215,6 +216,8 @@ private:
GatherList Gathered;
unsigned ParallelLoopAccessMDKind;
DominatorTree *DT;
};
class ScalarizerLegacyPass : public FunctionPass {
@ -226,6 +229,11 @@ public:
}
bool runOnFunction(Function &F) override;
void getAnalysisUsage(AnalysisUsage& AU) const override {
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
}
};
} // end anonymous namespace
@ -233,6 +241,7 @@ public:
char ScalarizerLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(ScalarizerLegacyPass, "scalarizer",
"Scalarize vector operations", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(ScalarizerLegacyPass, "scalarizer",
"Scalarize vector operations", false, false)
@ -304,7 +313,8 @@ bool ScalarizerLegacyPass::runOnFunction(Function &F) {
Module &M = *F.getParent();
unsigned ParallelLoopAccessMDKind =
M.getContext().getMDKindID("llvm.mem.parallel_loop_access");
ScalarizerVisitor Impl(ParallelLoopAccessMDKind);
DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
ScalarizerVisitor Impl(ParallelLoopAccessMDKind, DT);
return Impl.visit(F);
}
@ -341,6 +351,15 @@ Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V) {
return Scatterer(BB, BB->begin(), V, &Scattered[V]);
}
if (Instruction *VOp = dyn_cast<Instruction>(V)) {
// When scalarizing PHI nodes we might try to examine/rewrite InsertElement
// nodes in predecessors. If those predecessors are unreachable from entry,
// then the IR in those blocks could have unexpected properties resulting in
// infinite loops in Scatterer::operator[]. By simply treating values
// originating from instructions in unreachable blocks as undef we do not
// need to analyse them further.
if (!DT->isReachableFromEntry(VOp->getParent()))
return Scatterer(Point->getParent(), Point->getIterator(),
UndefValue::get(V->getType()));
// Put the scattered form of an instruction directly after the
// instruction.
BasicBlock *BB = VOp->getParent();
@ -857,7 +876,10 @@ PreservedAnalyses ScalarizerPass::run(Function &F, FunctionAnalysisManager &AM)
Module &M = *F.getParent();
unsigned ParallelLoopAccessMDKind =
M.getContext().getMDKindID("llvm.mem.parallel_loop_access");
ScalarizerVisitor Impl(ParallelLoopAccessMDKind);
DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
ScalarizerVisitor Impl(ParallelLoopAccessMDKind, DT);
bool Changed = Impl.visit(F);
return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
PreservedAnalyses PA;
PA.preserve<DominatorTreeAnalysis>();
return Changed ? PA : PreservedAnalyses::all();
}

View File

@ -0,0 +1,98 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -scalarizer -S -o - | FileCheck %s
define i16 @f1() {
; CHECK-LABEL: @f1(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_END:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INSERT:%.*]] = insertelement <4 x i16> [[INSERT]], i16 ptrtoint (i16 ()* @f1 to i16), i32 0
; CHECK-NEXT: br label [[FOR_COND:%.*]]
; CHECK: for.cond:
; CHECK-NEXT: br i1 undef, label [[FOR_BODY:%.*]], label [[FOR_END]]
; CHECK: for.end:
; CHECK-NEXT: [[PHI_I0:%.*]] = phi i16 [ 1, [[ENTRY:%.*]] ], [ undef, [[FOR_COND]] ]
; CHECK-NEXT: [[PHI_I1:%.*]] = phi i16 [ 1, [[ENTRY]] ], [ undef, [[FOR_COND]] ]
; CHECK-NEXT: [[PHI_I2:%.*]] = phi i16 [ 1, [[ENTRY]] ], [ undef, [[FOR_COND]] ]
; CHECK-NEXT: [[PHI_I3:%.*]] = phi i16 [ 1, [[ENTRY]] ], [ undef, [[FOR_COND]] ]
; CHECK-NEXT: [[PHI_UPTO0:%.*]] = insertelement <4 x i16> undef, i16 [[PHI_I0]], i32 0
; CHECK-NEXT: [[PHI_UPTO1:%.*]] = insertelement <4 x i16> [[PHI_UPTO0]], i16 [[PHI_I1]], i32 1
; CHECK-NEXT: [[PHI_UPTO2:%.*]] = insertelement <4 x i16> [[PHI_UPTO1]], i16 [[PHI_I2]], i32 2
; CHECK-NEXT: [[PHI:%.*]] = insertelement <4 x i16> [[PHI_UPTO2]], i16 [[PHI_I3]], i32 3
; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x i16> [[PHI]], i32 0
; CHECK-NEXT: ret i16 [[EXTRACT]]
;
entry:
br label %for.end
for.body:
%insert = insertelement <4 x i16> %insert, i16 ptrtoint (i16 () * @f1 to i16), i32 0
br label %for.cond
for.cond:
br i1 undef, label %for.body, label %for.end
for.end:
; opt used to hang when scalarizing this code. When scattering %insert we
; need to analyze the insertelement in the unreachable-from-entry block
; for.body. Note that the insertelement instruction depends on itself, and
; this kind of IR is not allowed in reachable-from-entry blocks.
%phi = phi <4 x i16> [ <i16 1, i16 1, i16 1, i16 1>, %entry ], [ %insert, %for.cond ]
%extract = extractelement <4 x i16> %phi, i32 0
ret i16 %extract
}
define void @f2() {
; CHECK-LABEL: @f2(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_END8:%.*]]
; CHECK: if.then:
; CHECK-NEXT: br label [[IF_END8]]
; CHECK: for.body2:
; CHECK-NEXT: br i1 undef, label [[FOR_END:%.*]], label [[FOR_INC:%.*]]
; CHECK: for.end:
; CHECK-NEXT: br label [[FOR_INC]]
; CHECK: for.inc:
; CHECK-NEXT: [[E_SROA_3_2:%.*]] = phi <2 x i64> [ <i64 1, i64 1>, [[FOR_END]] ], [ [[E_SROA_3_2]], [[FOR_BODY2:%.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ 6, [[FOR_END]] ], [ [[TMP0]], [[FOR_BODY2]] ]
; CHECK-NEXT: br i1 undef, label [[FOR_BODY2]], label [[FOR_COND1_FOR_END7_CRIT_EDGE:%.*]]
; CHECK: for.cond1.for.end7_crit_edge:
; CHECK-NEXT: br label [[IF_END8]]
; CHECK: if.end8:
; CHECK-NEXT: [[E_SROA_3_4_I0:%.*]] = phi i64 [ undef, [[FOR_BODY]] ], [ undef, [[FOR_COND1_FOR_END7_CRIT_EDGE]] ], [ undef, [[IF_THEN]] ]
; CHECK-NEXT: [[E_SROA_3_4_I1:%.*]] = phi i64 [ undef, [[FOR_BODY]] ], [ undef, [[FOR_COND1_FOR_END7_CRIT_EDGE]] ], [ undef, [[IF_THEN]] ]
; CHECK-NEXT: br label [[FOR_BODY]]
;
entry:
br label %for.body
for.body: ; preds = %if.end8, %entry
br i1 undef, label %if.then, label %if.end8
if.then: ; preds = %for.body
br label %if.end8
for.body2: ; preds = %for.inc
br i1 undef, label %for.end, label %for.inc
for.end: ; preds = %for.body2
br label %for.inc
for.inc: ; preds = %for.end, %for.body2
%e.sroa.3.2 = phi <2 x i64> [ <i64 1, i64 1>, %for.end ], [ %e.sroa.3.2, %for.body2 ]
%0 = phi i32 [ 6, %for.end ], [ %0, %for.body2 ]
br i1 undef, label %for.body2, label %for.cond1.for.end7_crit_edge
for.cond1.for.end7_crit_edge: ; preds = %for.inc
br label %if.end8
if.end8: ; preds = %for.cond1.for.end7_crit_edge, %if.then, %for.body
; This used to lead to inserted extractelement instructions between the phis
; in %for.inc.
; %e.sroa.3.2 is defined in a block that is unreachable from entry so we can
; safely replace it with undef in the phi defining e.sroa.3.4.
%e.sroa.3.4 = phi <2 x i64> [ undef, %for.body ], [ %e.sroa.3.2, %for.cond1.for.end7_crit_edge ], [ undef, %if.then ]
br label %for.body
}