forked from OSchip/llvm-project
[FIX] Do not recompute SCEVs but pass them to subfunctions
This reverts commit 2879c53e80e05497f408f21ce470d122e9f90f94. Additionally, it adds SDiv and SRem instructions to the set of values discovered by the findValues function even if we add the operands to be able to recompute the SCEVs. In subfunctions we do not want to recompute SDiv and SRem instructions but pass them instead as they might have been created through the IslExprBuilder and are more complicated than simple SDiv/SRem instructions in the code. llvm-svn: 265873
This commit is contained in:
parent
cc05ee5242
commit
b3410db2b7
|
@ -572,18 +572,15 @@ public:
|
|||
if (!Unknown)
|
||||
return true;
|
||||
|
||||
Values.insert(Unknown->getValue());
|
||||
Instruction *Inst = dyn_cast<Instruction>(Unknown->getValue());
|
||||
if (!Inst || (Inst->getOpcode() != Instruction::SRem &&
|
||||
Inst->getOpcode() != Instruction::SDiv)) {
|
||||
Values.insert(Unknown->getValue());
|
||||
Inst->getOpcode() != Instruction::SDiv))
|
||||
return false;
|
||||
}
|
||||
|
||||
auto *Dividend = SE.getSCEV(Inst->getOperand(1));
|
||||
if (!isa<SCEVConstant>(Dividend)) {
|
||||
Values.insert(Unknown->getValue());
|
||||
if (!isa<SCEVConstant>(Dividend))
|
||||
return false;
|
||||
}
|
||||
|
||||
auto *Divisor = SE.getSCEV(Inst->getOperand(0));
|
||||
SCEVFindValues FindValues(SE, Values);
|
||||
|
|
|
@ -225,9 +225,9 @@ void polly::splitEntryBlockForAlloca(BasicBlock *EntryBlock, Pass *P) {
|
|||
struct ScopExpander : SCEVVisitor<ScopExpander, const SCEV *> {
|
||||
friend struct SCEVVisitor<ScopExpander, const SCEV *>;
|
||||
|
||||
explicit ScopExpander(const Region &R, Function &F, ScalarEvolution &SE,
|
||||
explicit ScopExpander(const Region &R, ScalarEvolution &SE,
|
||||
const DataLayout &DL, const char *Name, ValueMapT *VMap)
|
||||
: Expander(SCEVExpander(SE, DL, Name)), F(F), SE(SE), Name(Name), R(R),
|
||||
: Expander(SCEVExpander(SE, DL, Name)), SE(SE), Name(Name), R(R),
|
||||
VMap(VMap) {}
|
||||
|
||||
Value *expandCodeFor(const SCEV *E, Type *Ty, Instruction *I) {
|
||||
|
@ -241,10 +241,6 @@ struct ScopExpander : SCEVVisitor<ScopExpander, const SCEV *> {
|
|||
|
||||
private:
|
||||
SCEVExpander Expander;
|
||||
|
||||
/// @brief The function in which the code is placed.
|
||||
Function &F;
|
||||
|
||||
ScalarEvolution &SE;
|
||||
const char *Name;
|
||||
const Region &R;
|
||||
|
@ -268,15 +264,10 @@ private:
|
|||
Inst->getOpcode() != Instruction::SDiv))
|
||||
return E;
|
||||
|
||||
// If the instruction is outside the SCoP we can just use it without the
|
||||
// need to recompute it. However, if it is in another function we need to
|
||||
// recompute it as the definition does not dominate the use.
|
||||
bool SameFunction = (&F == R.getEntry()->getParent());
|
||||
if (!R.contains(Inst) && SameFunction)
|
||||
if (!R.contains(Inst))
|
||||
return E;
|
||||
|
||||
Instruction *StartIP = SameFunction ? R.getEnteringBlock()->getTerminator()
|
||||
: F.getEntryBlock().getTerminator();
|
||||
Instruction *StartIP = R.getEnteringBlock()->getTerminator();
|
||||
|
||||
const SCEV *LHSScev = visit(SE.getSCEV(Inst->getOperand(0)));
|
||||
const SCEV *RHSScev = visit(SE.getSCEV(Inst->getOperand(1)));
|
||||
|
@ -342,7 +333,7 @@ private:
|
|||
Value *polly::expandCodeFor(Scop &S, ScalarEvolution &SE, const DataLayout &DL,
|
||||
const char *Name, const SCEV *E, Type *Ty,
|
||||
Instruction *IP, ValueMapT *VMap) {
|
||||
ScopExpander Expander(S.getRegion(), *IP->getFunction(), SE, DL, Name, VMap);
|
||||
ScopExpander Expander(S.getRegion(), SE, DL, Name, VMap);
|
||||
return Expander.expandCodeFor(E, Ty, IP);
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,59 @@
|
|||
; RUN: opt %loadPolly -polly-opt-isl -polly-parallel -polly-codegen -S < %s | FileCheck %s
|
||||
;
|
||||
; Check that we do not crash but generate parallel code
|
||||
;
|
||||
; CHECK: polly.par.setup
|
||||
;
|
||||
; ModuleID = 'bugpoint-reduced-simplified.bc'
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
; Function Attrs: nounwind uwtable
|
||||
define void @III_hybrid([32 x double]* %tsOut) #0 {
|
||||
entry:
|
||||
%0 = getelementptr inbounds [32 x double], [32 x double]* %tsOut, i64 0, i64 0
|
||||
br label %if.end
|
||||
|
||||
if.end: ; preds = %entry
|
||||
br i1 undef, label %for.body42, label %for.cond66.preheader
|
||||
|
||||
for.cond39.for.cond66.preheader.loopexit67_crit_edge: ; preds = %for.body42
|
||||
%add.ptr62.lcssa = phi double* [ undef, %for.body42 ]
|
||||
br label %for.cond66.preheader
|
||||
|
||||
for.cond66.preheader: ; preds = %for.cond39.for.cond66.preheader.loopexit67_crit_edge, %if.end
|
||||
%rawout1.3.ph = phi double* [ %add.ptr62.lcssa, %for.cond39.for.cond66.preheader.loopexit67_crit_edge ], [ undef, %if.end ]
|
||||
%sb.3.ph = phi i32 [ 0, %for.cond39.for.cond66.preheader.loopexit67_crit_edge ], [ 0, %if.end ]
|
||||
%tspnt.3.ph = phi double* [ undef, %for.cond39.for.cond66.preheader.loopexit67_crit_edge ], [ %0, %if.end ]
|
||||
br label %for.cond69.preheader
|
||||
|
||||
for.body42: ; preds = %if.end
|
||||
br label %for.cond39.for.cond66.preheader.loopexit67_crit_edge
|
||||
|
||||
for.cond69.preheader: ; preds = %for.end76, %for.cond66.preheader
|
||||
%tspnt.375 = phi double* [ %incdec.ptr79, %for.end76 ], [ %tspnt.3.ph, %for.cond66.preheader ]
|
||||
%sb.374 = phi i32 [ %inc78, %for.end76 ], [ %sb.3.ph, %for.cond66.preheader ]
|
||||
%rawout1.373 = phi double* [ undef, %for.end76 ], [ %rawout1.3.ph, %for.cond66.preheader ]
|
||||
br label %for.body71
|
||||
|
||||
for.body71: ; preds = %for.body71, %for.cond69.preheader
|
||||
%indvars.iv = phi i64 [ 0, %for.cond69.preheader ], [ %indvars.iv.next, %for.body71 ]
|
||||
%rawout1.469 = phi double* [ %rawout1.373, %for.cond69.preheader ], [ undef, %for.body71 ]
|
||||
%1 = bitcast double* %rawout1.469 to i64*
|
||||
%2 = load i64, i64* %1, align 8
|
||||
%3 = shl nsw i64 %indvars.iv, 5
|
||||
%arrayidx73 = getelementptr inbounds double, double* %tspnt.375, i64 %3
|
||||
%4 = bitcast double* %arrayidx73 to i64*
|
||||
store i64 %2, i64* %4, align 8
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond = icmp ne i64 %indvars.iv.next, 18
|
||||
br i1 %exitcond, label %for.body71, label %for.end76
|
||||
|
||||
for.end76: ; preds = %for.body71
|
||||
%inc78 = add nsw i32 %sb.374, 1
|
||||
%incdec.ptr79 = getelementptr inbounds double, double* %tspnt.375, i64 1
|
||||
%exitcond95 = icmp ne i32 %inc78, 32
|
||||
br i1 %exitcond95, label %for.cond69.preheader, label %for.end80
|
||||
|
||||
for.end80: ; preds = %for.end76
|
||||
ret void
|
||||
}
|
|
@ -1,10 +1,10 @@
|
|||
; RUN: opt %loadPolly -polly-codegen -polly-parallel \
|
||||
; RUN: -polly-parallel-force -S < %s | FileCheck %s
|
||||
;
|
||||
; Test to verify that we recompute %rem96 in the parallel subfunction.
|
||||
; Test to verify that we pass %rem96 to the parallel subfunction.
|
||||
;
|
||||
; CHECK: %rem96polly = srem i32 %polly.subfunc.arg.n2, 16
|
||||
; CHECK-NEXT: br label %polly.par.checkNext
|
||||
; CHECK: %[[R:[0-9]*]] = getelementptr inbounds { i32, i32, i64, float*, float*, i32 }, { i32, i32, i64, float*, float*, i32 }* %polly.par.userContext1, i32 0, i32 5
|
||||
; CHECK-NEXT: %polly.subfunc.arg.rem96 = load i32, i32* %[[R]]
|
||||
;
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
|
|
Loading…
Reference in New Issue