forked from OSchip/llvm-project
BlockGenerator: Do not redundantly reload from PHI-allocas in non-affine stmts
Before this change we created an additional reload in the copy of the incoming block of a PHI node to reload the incoming value, even though the necessary value has already been made available by the normally generated scalar loads. In this change, we drop the code that generates this redundant reload and instead just reuse the scalar value already available. Besides making the generated code slightly cleaner, this change also makes sure that scalar loads go through the normal logic, which means they can be remapped (e.g. to array slots) and corresponding code is generated to load from the remapped location. Without this change, the original scalar load at the beginning of the non-affine region would have been remapped, but the redundant scalar load would continue to load from the old PHI slot location. It might be possible to further simplify the code in addOperandToPHI, but this would not only mean to pull out getNewValue, but to also change the insertion point update logic. As this did not work when trying it the first time, this change is likely not trivial. To not introduce bugs last minute, we postpone further simplications to a subsequent commit. We also document the current behavior a little bit better. Reviewed By: Meinersbur Differential Revision: https://reviews.llvm.org/D28892 llvm-svn: 292486
This commit is contained in:
parent
2074e7497b
commit
75dfaa1dbe
|
@ -834,6 +834,77 @@ private:
|
|||
|
||||
/// Add the new operand from the copy of @p IncomingBB to @p PHICopy.
|
||||
///
|
||||
/// PHI nodes, which may have (multiple) edges that enter from outside the
|
||||
/// non-affine subregion and even from outside the scop, are code generated as
|
||||
/// follows:
|
||||
///
|
||||
/// # Original
|
||||
///
|
||||
/// Region: %A-> %exit
|
||||
/// NonAffine Stmt: %nonaffB -> %D (includes %nonaffB, %nonaffC)
|
||||
///
|
||||
/// pre:
|
||||
/// %val = add i64 1, 1
|
||||
///
|
||||
/// A:
|
||||
/// br label %nonaff
|
||||
///
|
||||
/// nonaffB:
|
||||
/// %phi = phi i64 [%val, %A], [%valC, %nonAffC], [%valD, %D]
|
||||
/// %cmp = <nonaff>
|
||||
/// br i1 %cmp, label %C, label %nonaffC
|
||||
///
|
||||
/// nonaffC:
|
||||
/// %valC = add i64 1, 1
|
||||
/// br i1 undef, label %D, label %nonaffB
|
||||
///
|
||||
/// D:
|
||||
/// %valD = ...
|
||||
/// %exit_cond = <loopexit>
|
||||
/// br i1 %exit_cond, label %nonaffB, label %exit
|
||||
///
|
||||
/// exit:
|
||||
/// ...
|
||||
///
|
||||
/// - %start and %C enter from outside the non-affine region.
|
||||
/// - %nonaffC enters from within the non-affine region.
|
||||
///
|
||||
/// # New
|
||||
///
|
||||
/// polly.A:
|
||||
/// store i64 %val, i64* %phi.phiops
|
||||
/// br label %polly.nonaffA.entry
|
||||
///
|
||||
/// polly.nonaffB.entry:
|
||||
/// %phi.phiops.reload = load i64, i64* %phi.phiops
|
||||
/// br label %nonaffB
|
||||
///
|
||||
/// polly.nonaffB:
|
||||
/// %polly.phi = [%phi.phiops.reload, %nonaffB.entry],
|
||||
/// [%p.valC, %polly.nonaffC]
|
||||
///
|
||||
/// polly.nonaffC:
|
||||
/// %p.valC = add i64 1, 1
|
||||
/// br i1 undef, label %polly.D, label %polly.nonaffB
|
||||
///
|
||||
/// polly.D:
|
||||
/// %p.valD = ...
|
||||
/// store i64 %p.valD, i64* %phi.phiops
|
||||
/// %p.exit_cond = <loopexit>
|
||||
/// br i1 %p.exit_cond, label %polly.nonaffB, label %exit
|
||||
///
|
||||
/// Values that enter the PHI from outside the non-affine region are stored
|
||||
/// into the stack slot %phi.phiops by statements %polly.A and %polly.D and
|
||||
/// reloaded in %polly.nonaffB.entry, a basic block generated before the
|
||||
/// actual non-affine region.
|
||||
///
|
||||
/// When generating the PHI node of the non-affine region in %polly.nonaffB,
|
||||
/// incoming edges from outside the region are combined into a single branch
|
||||
/// from %polly.nonaffB.entry which has as incoming value the value reloaded
|
||||
/// from the %phi.phiops stack slot. Incoming edges from within the region
|
||||
/// refer to the copied instructions (%p.valC) and basic blocks
|
||||
/// (%polly.nonaffC) of the non-affine region.
|
||||
///
|
||||
/// @param Stmt The statement to code generate.
|
||||
/// @param PHI The original PHI we copy.
|
||||
/// @param PHICopy The copy of @p PHI.
|
||||
|
|
|
@ -1415,12 +1415,12 @@ void RegionGenerator::addOperandToPHI(ScopStmt &Stmt, PHINode *PHI,
|
|||
return;
|
||||
}
|
||||
|
||||
Value *OpCopy = nullptr;
|
||||
if (StmtR->contains(IncomingBB)) {
|
||||
assert(RegionMaps.count(BBCopy) &&
|
||||
"Incoming PHI block did not have a BBMap");
|
||||
ValueMapT &BBCopyMap = RegionMaps[BBCopy];
|
||||
assert(RegionMaps.count(BBCopy) && "Incoming PHI block did not have a BBMap");
|
||||
ValueMapT &BBCopyMap = RegionMaps[BBCopy];
|
||||
|
||||
Value *OpCopy = nullptr;
|
||||
|
||||
if (StmtR->contains(IncomingBB)) {
|
||||
Value *Op = PHI->getIncomingValueForBlock(IncomingBB);
|
||||
|
||||
// If the current insert block is different from the PHIs incoming block
|
||||
|
@ -1432,13 +1432,15 @@ void RegionGenerator::addOperandToPHI(ScopStmt &Stmt, PHINode *PHI,
|
|||
if (IP->getParent() != BBCopy)
|
||||
Builder.SetInsertPoint(&*IP);
|
||||
} else {
|
||||
|
||||
// All edges from outside the non-affine region become a single edge
|
||||
// in the new copy of the non-affine region. Make sure to only add the
|
||||
// corresponding edge the first time we encounter a basic block from
|
||||
// outside the non-affine region.
|
||||
if (PHICopy->getBasicBlockIndex(BBCopy) >= 0)
|
||||
return;
|
||||
|
||||
Value *PHIOpAddr = getOrCreatePHIAlloca(PHI);
|
||||
OpCopy = new LoadInst(PHIOpAddr, PHIOpAddr->getName() + ".reload",
|
||||
BlockMap[IncomingBB]->getTerminator());
|
||||
// Get the reloaded value.
|
||||
OpCopy = getNewValue(Stmt, PHI, BBCopyMap, LTS, getLoopForStmt(Stmt));
|
||||
}
|
||||
|
||||
assert(OpCopy && "Incoming PHI value was not copied properly");
|
||||
|
|
|
@ -12,11 +12,10 @@
|
|||
|
||||
; CHECK: polly.stmt.bb2.entry: ; preds = %polly.start
|
||||
; CHECK-NEXT: %tmp.phiops.reload = load i32, i32* %tmp.phiops
|
||||
; CHECK-NEXT: %tmp.phiops.reload2 = load i32, i32* %tmp.phiops
|
||||
; CHECK-NEXT: br label %polly.stmt.bb2
|
||||
|
||||
; CHECK: polly.stmt.bb2: ; preds = %polly.stmt.bb2, %polly.stmt.bb2.entry
|
||||
; CHECK-NEXT: %polly.tmp = phi i32 [ %tmp.phiops.reload2, %polly.stmt.bb2.entry ], [ %p_tmp4, %polly.stmt.bb2 ]
|
||||
; CHECK-NEXT: %polly.tmp = phi i32 [ %tmp.phiops.reload, %polly.stmt.bb2.entry ], [ %p_tmp4, %polly.stmt.bb2 ]
|
||||
; CHECK-NEXT: %p_tmp3 = or i32 undef, undef
|
||||
; CHECK-NEXT: %p_tmp4 = udiv i32 %p_tmp3, 10
|
||||
; CHECK-NEXT: %p_tmp6 = icmp eq i8 undef, 0
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
; CHECK: %loop_carried.phiops.reload = load float, float* %loop_carried.phiops
|
||||
;
|
||||
; CHECK-LABEL: polly.stmt.subregion_entry:
|
||||
; CHECK: %polly.loop_carried = phi float [ %loop_carried.phiops.reload2, %polly.stmt.subregion_entry.entry ]
|
||||
; CHECK: %polly.loop_carried = phi float [ %loop_carried.phiops.reload, %polly.stmt.subregion_entry.entry ]
|
||||
; CHECK: %p_newval = fadd float %polly.loop_carried, 1.000000e+00
|
||||
;
|
||||
; CHECK-LABEL: polly.stmt.polly.merge_new_and_old.exit:
|
||||
|
|
Loading…
Reference in New Issue