[FIX] Do not recompute SCEVs but pass them to subfunctions

This reverts commit 2879c53e80e05497f408f21ce470d122e9f90f94.
  Additionally, it adds SDiv and SRem instructions to the set of values
  discovered by the findValues function even if we add the operands to
  be able to recompute the SCEVs. In subfunctions we do not want to
  recompute SDiv and SRem instructions but pass them instead as they
  might have been created through the IslExprBuilder and are more
  complicated than simple SDiv/SRem instructions in the code.

llvm-svn: 265873
This commit is contained in:
Johannes Doerfert 2016-04-09 14:30:11 +00:00
parent cc05ee5242
commit b3410db2b7
4 changed files with 70 additions and 23 deletions

View File

@ -572,18 +572,15 @@ public:
if (!Unknown)
return true;
Values.insert(Unknown->getValue());
Instruction *Inst = dyn_cast<Instruction>(Unknown->getValue());
if (!Inst || (Inst->getOpcode() != Instruction::SRem &&
Inst->getOpcode() != Instruction::SDiv)) {
Values.insert(Unknown->getValue());
Inst->getOpcode() != Instruction::SDiv))
return false;
}
auto *Dividend = SE.getSCEV(Inst->getOperand(1));
if (!isa<SCEVConstant>(Dividend)) {
Values.insert(Unknown->getValue());
if (!isa<SCEVConstant>(Dividend))
return false;
}
auto *Divisor = SE.getSCEV(Inst->getOperand(0));
SCEVFindValues FindValues(SE, Values);

View File

@ -225,9 +225,9 @@ void polly::splitEntryBlockForAlloca(BasicBlock *EntryBlock, Pass *P) {
struct ScopExpander : SCEVVisitor<ScopExpander, const SCEV *> {
friend struct SCEVVisitor<ScopExpander, const SCEV *>;
explicit ScopExpander(const Region &R, Function &F, ScalarEvolution &SE,
explicit ScopExpander(const Region &R, ScalarEvolution &SE,
const DataLayout &DL, const char *Name, ValueMapT *VMap)
: Expander(SCEVExpander(SE, DL, Name)), F(F), SE(SE), Name(Name), R(R),
: Expander(SCEVExpander(SE, DL, Name)), SE(SE), Name(Name), R(R),
VMap(VMap) {}
Value *expandCodeFor(const SCEV *E, Type *Ty, Instruction *I) {
@ -241,10 +241,6 @@ struct ScopExpander : SCEVVisitor<ScopExpander, const SCEV *> {
private:
SCEVExpander Expander;
/// @brief The function in which the code is placed.
Function &F;
ScalarEvolution &SE;
const char *Name;
const Region &R;
@ -268,15 +264,10 @@ private:
Inst->getOpcode() != Instruction::SDiv))
return E;
// If the instruction is outside the SCoP we can just use it without the
// need to recompute it. However, if it is in another function we need to
// recompute it as the definition does not dominate the use.
bool SameFunction = (&F == R.getEntry()->getParent());
if (!R.contains(Inst) && SameFunction)
if (!R.contains(Inst))
return E;
Instruction *StartIP = SameFunction ? R.getEnteringBlock()->getTerminator()
: F.getEntryBlock().getTerminator();
Instruction *StartIP = R.getEnteringBlock()->getTerminator();
const SCEV *LHSScev = visit(SE.getSCEV(Inst->getOperand(0)));
const SCEV *RHSScev = visit(SE.getSCEV(Inst->getOperand(1)));
@ -342,7 +333,7 @@ private:
Value *polly::expandCodeFor(Scop &S, ScalarEvolution &SE, const DataLayout &DL,
const char *Name, const SCEV *E, Type *Ty,
Instruction *IP, ValueMapT *VMap) {
ScopExpander Expander(S.getRegion(), *IP->getFunction(), SE, DL, Name, VMap);
ScopExpander Expander(S.getRegion(), SE, DL, Name, VMap);
return Expander.expandCodeFor(E, Ty, IP);
}

View File

@ -0,0 +1,59 @@
; RUN: opt %loadPolly -polly-opt-isl -polly-parallel -polly-codegen -S < %s | FileCheck %s
;
; Check that we do not crash but generate parallel code
;
; CHECK: polly.par.setup
;
; ModuleID = 'bugpoint-reduced-simplified.bc'
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; Function Attrs: nounwind uwtable
define void @III_hybrid([32 x double]* %tsOut) #0 {
entry:
%0 = getelementptr inbounds [32 x double], [32 x double]* %tsOut, i64 0, i64 0
br label %if.end
if.end: ; preds = %entry
br i1 undef, label %for.body42, label %for.cond66.preheader
for.cond39.for.cond66.preheader.loopexit67_crit_edge: ; preds = %for.body42
%add.ptr62.lcssa = phi double* [ undef, %for.body42 ]
br label %for.cond66.preheader
for.cond66.preheader: ; preds = %for.cond39.for.cond66.preheader.loopexit67_crit_edge, %if.end
%rawout1.3.ph = phi double* [ %add.ptr62.lcssa, %for.cond39.for.cond66.preheader.loopexit67_crit_edge ], [ undef, %if.end ]
%sb.3.ph = phi i32 [ 0, %for.cond39.for.cond66.preheader.loopexit67_crit_edge ], [ 0, %if.end ]
%tspnt.3.ph = phi double* [ undef, %for.cond39.for.cond66.preheader.loopexit67_crit_edge ], [ %0, %if.end ]
br label %for.cond69.preheader
for.body42: ; preds = %if.end
br label %for.cond39.for.cond66.preheader.loopexit67_crit_edge
for.cond69.preheader: ; preds = %for.end76, %for.cond66.preheader
%tspnt.375 = phi double* [ %incdec.ptr79, %for.end76 ], [ %tspnt.3.ph, %for.cond66.preheader ]
%sb.374 = phi i32 [ %inc78, %for.end76 ], [ %sb.3.ph, %for.cond66.preheader ]
%rawout1.373 = phi double* [ undef, %for.end76 ], [ %rawout1.3.ph, %for.cond66.preheader ]
br label %for.body71
for.body71: ; preds = %for.body71, %for.cond69.preheader
%indvars.iv = phi i64 [ 0, %for.cond69.preheader ], [ %indvars.iv.next, %for.body71 ]
%rawout1.469 = phi double* [ %rawout1.373, %for.cond69.preheader ], [ undef, %for.body71 ]
%1 = bitcast double* %rawout1.469 to i64*
%2 = load i64, i64* %1, align 8
%3 = shl nsw i64 %indvars.iv, 5
%arrayidx73 = getelementptr inbounds double, double* %tspnt.375, i64 %3
%4 = bitcast double* %arrayidx73 to i64*
store i64 %2, i64* %4, align 8
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp ne i64 %indvars.iv.next, 18
br i1 %exitcond, label %for.body71, label %for.end76
for.end76: ; preds = %for.body71
%inc78 = add nsw i32 %sb.374, 1
%incdec.ptr79 = getelementptr inbounds double, double* %tspnt.375, i64 1
%exitcond95 = icmp ne i32 %inc78, 32
br i1 %exitcond95, label %for.cond69.preheader, label %for.end80
for.end80: ; preds = %for.end76
ret void
}

View File

@ -1,10 +1,10 @@
; RUN: opt %loadPolly -polly-codegen -polly-parallel \
; RUN: -polly-parallel-force -S < %s | FileCheck %s
;
; Test to verify that we recompute %rem96 in the parallel subfunction.
; Test to verify that we pass %rem96 to the parallel subfunction.
;
; CHECK: %rem96polly = srem i32 %polly.subfunc.arg.n2, 16
; CHECK-NEXT: br label %polly.par.checkNext
; CHECK: %[[R:[0-9]*]] = getelementptr inbounds { i32, i32, i64, float*, float*, i32 }, { i32, i32, i64, float*, float*, i32 }* %polly.par.userContext1, i32 0, i32 5
; CHECK-NEXT: %polly.subfunc.arg.rem96 = load i32, i32* %[[R]]
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"