[FIX] Use same alloca for invariant loads and the scalar users

llvm-svn: 252451
This commit is contained in:
Johannes Doerfert 2015-11-09 06:28:45 +00:00
parent 088b7877f2
commit 7a6e292d86
2 changed files with 90 additions and 14 deletions

View File

@ -991,25 +991,35 @@ bool IslNodeBuilder::preloadInvariantEquivClass(
isl_id_free(ParamId);
}
for (auto *DerivedSAI : SAI->getDerivedSAIs()) {
Value *BasePtr = DerivedSAI->getBasePtr();
// As the derived SAI information is quite coarse, any load from the current
// SAI could be the base pointer of the derived SAI, however we should only
// change the base pointer of the derived SAI if we actually preloaded it.
for (const MemoryAccess *MA : MAs) {
if (BasePtr != MA->getBaseAddr())
continue;
BasePtr = Builder.CreateBitOrPointerCast(PreloadVal, BasePtr->getType());
DerivedSAI->setBasePtr(BasePtr);
}
}
BasicBlock *EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock();
auto *Alloca = new AllocaInst(AccInstTy, AccInst->getName() + ".preload.s2a");
Alloca->insertBefore(&*EntryBB->getFirstInsertionPt());
Builder.CreateStore(PreloadVal, Alloca);
for (auto *DerivedSAI : SAI->getDerivedSAIs()) {
Value *BasePtr = DerivedSAI->getBasePtr();
for (const MemoryAccess *MA : MAs) {
// As the derived SAI information is quite coarse, any load from the
// current SAI could be the base pointer of the derived SAI, however we
// should only change the base pointer of the derived SAI if we actually
// preloaded it.
if (BasePtr == MA->getBaseAddr()) {
BasePtr =
Builder.CreateBitOrPointerCast(PreloadVal, BasePtr->getType());
DerivedSAI->setBasePtr(BasePtr);
}
// For scalar derived SAIs we remap the alloca used for the derived value.
if (BasePtr == MA->getAccessInstruction()) {
if (DerivedSAI->isPHI())
PHIOpMap[BasePtr] = Alloca;
else
ScalarMap[BasePtr] = Alloca;
}
}
}
const Region &R = S.getRegion();
for (const MemoryAccess *MA : MAs) {

View File

@ -0,0 +1,66 @@
; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s
;
; Verify the preloaded %0 is stored and communicated in the same alloca.
;
; CHECK-NOT: alloca
; CHECK: %dec3.s2a = alloca i32
; CHECK-NOT: alloca
; CHECK: %dec3.in.phiops = alloca i32
; CHECK-NOT: alloca
; CHECK: %.preload.s2a = alloca i32
; CHECK-NOT: alloca
;
; CHECK: %ncol.load = load i32, i32* @ncol
; CHECK-NEXT: store i32 %ncol.load, i32* %.preload.s2a
;
; CHECK: polly.stmt.while.body.lr.ph:
; CHECK-NEXT: %.preload.s2a.reload = load i32, i32* %.preload.s2a
; CHECK-NEXT: store i32 %.preload.s2a.reload, i32* %dec3.in.phiops
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@ncol = external global i32, align 4
define void @melt_data(i32* %data1, i32* %data2) {
entry:
br label %entry.split
entry.split: ; preds = %entry
%0 = load i32, i32* @ncol, align 4
%tobool.2 = icmp eq i32 %0, 0
br i1 %tobool.2, label %while.end, label %while.body.lr.ph
while.body.lr.ph: ; preds = %entry.split
br label %while.body
while.body: ; preds = %while.body.lr.ph, %while.cond.backedge
%dec3.in = phi i32 [ %0, %while.body.lr.ph ], [ %dec3, %while.cond.backedge ]
%dec3 = add nsw i32 %dec3.in, -1
%idxprom = sext i32 %dec3 to i64
%arrayidx = getelementptr inbounds i32, i32* %data1, i64 %idxprom
%1 = load i32, i32* %arrayidx, align 4
%idxprom1 = sext i32 %dec3 to i64
%arrayidx2 = getelementptr inbounds i32, i32* %data2, i64 %idxprom1
%2 = load i32, i32* %arrayidx2, align 4
%cmp = icmp sgt i32 %1, %2
br i1 %cmp, label %if.then, label %while.cond.backedge
if.then: ; preds = %while.body
%idxprom3 = sext i32 %dec3 to i64
%arrayidx4 = getelementptr inbounds i32, i32* %data2, i64 %idxprom3
%3 = load i32, i32* %arrayidx4, align 4
%idxprom5 = sext i32 %dec3 to i64
%arrayidx6 = getelementptr inbounds i32, i32* %data1, i64 %idxprom5
store i32 %3, i32* %arrayidx6, align 4
br label %while.cond.backedge
while.cond.backedge: ; preds = %if.then, %while.body
%tobool = icmp eq i32 %dec3, 0
br i1 %tobool, label %while.cond.while.end_crit_edge, label %while.body
while.cond.while.end_crit_edge: ; preds = %while.cond.backedge
br label %while.end
while.end: ; preds = %while.cond.while.end_crit_edge, %entry.split
ret void
}