forked from OSchip/llvm-project
[FIX] Use same alloca for invariant loads and the scalar users
llvm-svn: 252451
This commit is contained in:
parent
088b7877f2
commit
7a6e292d86
|
@ -991,25 +991,35 @@ bool IslNodeBuilder::preloadInvariantEquivClass(
|
|||
isl_id_free(ParamId);
|
||||
}
|
||||
|
||||
for (auto *DerivedSAI : SAI->getDerivedSAIs()) {
|
||||
Value *BasePtr = DerivedSAI->getBasePtr();
|
||||
|
||||
// As the derived SAI information is quite coarse, any load from the current
|
||||
// SAI could be the base pointer of the derived SAI, however we should only
|
||||
// change the base pointer of the derived SAI if we actually preloaded it.
|
||||
for (const MemoryAccess *MA : MAs) {
|
||||
if (BasePtr != MA->getBaseAddr())
|
||||
continue;
|
||||
BasePtr = Builder.CreateBitOrPointerCast(PreloadVal, BasePtr->getType());
|
||||
DerivedSAI->setBasePtr(BasePtr);
|
||||
}
|
||||
}
|
||||
|
||||
BasicBlock *EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock();
|
||||
auto *Alloca = new AllocaInst(AccInstTy, AccInst->getName() + ".preload.s2a");
|
||||
Alloca->insertBefore(&*EntryBB->getFirstInsertionPt());
|
||||
Builder.CreateStore(PreloadVal, Alloca);
|
||||
|
||||
for (auto *DerivedSAI : SAI->getDerivedSAIs()) {
|
||||
Value *BasePtr = DerivedSAI->getBasePtr();
|
||||
|
||||
for (const MemoryAccess *MA : MAs) {
|
||||
// As the derived SAI information is quite coarse, any load from the
|
||||
// current SAI could be the base pointer of the derived SAI, however we
|
||||
// should only change the base pointer of the derived SAI if we actually
|
||||
// preloaded it.
|
||||
if (BasePtr == MA->getBaseAddr()) {
|
||||
BasePtr =
|
||||
Builder.CreateBitOrPointerCast(PreloadVal, BasePtr->getType());
|
||||
DerivedSAI->setBasePtr(BasePtr);
|
||||
}
|
||||
|
||||
// For scalar derived SAIs we remap the alloca used for the derived value.
|
||||
if (BasePtr == MA->getAccessInstruction()) {
|
||||
if (DerivedSAI->isPHI())
|
||||
PHIOpMap[BasePtr] = Alloca;
|
||||
else
|
||||
ScalarMap[BasePtr] = Alloca;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const Region &R = S.getRegion();
|
||||
for (const MemoryAccess *MA : MAs) {
|
||||
|
||||
|
|
|
@ -0,0 +1,66 @@
|
|||
; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s
|
||||
;
|
||||
; Verify the preloaded %0 is stored and communicated in the same alloca.
|
||||
;
|
||||
; CHECK-NOT: alloca
|
||||
; CHECK: %dec3.s2a = alloca i32
|
||||
; CHECK-NOT: alloca
|
||||
; CHECK: %dec3.in.phiops = alloca i32
|
||||
; CHECK-NOT: alloca
|
||||
; CHECK: %.preload.s2a = alloca i32
|
||||
; CHECK-NOT: alloca
|
||||
;
|
||||
; CHECK: %ncol.load = load i32, i32* @ncol
|
||||
; CHECK-NEXT: store i32 %ncol.load, i32* %.preload.s2a
|
||||
;
|
||||
; CHECK: polly.stmt.while.body.lr.ph:
|
||||
; CHECK-NEXT: %.preload.s2a.reload = load i32, i32* %.preload.s2a
|
||||
; CHECK-NEXT: store i32 %.preload.s2a.reload, i32* %dec3.in.phiops
|
||||
;
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
@ncol = external global i32, align 4
|
||||
|
||||
define void @melt_data(i32* %data1, i32* %data2) {
|
||||
entry:
|
||||
br label %entry.split
|
||||
|
||||
entry.split: ; preds = %entry
|
||||
%0 = load i32, i32* @ncol, align 4
|
||||
%tobool.2 = icmp eq i32 %0, 0
|
||||
br i1 %tobool.2, label %while.end, label %while.body.lr.ph
|
||||
|
||||
while.body.lr.ph: ; preds = %entry.split
|
||||
br label %while.body
|
||||
|
||||
while.body: ; preds = %while.body.lr.ph, %while.cond.backedge
|
||||
%dec3.in = phi i32 [ %0, %while.body.lr.ph ], [ %dec3, %while.cond.backedge ]
|
||||
%dec3 = add nsw i32 %dec3.in, -1
|
||||
%idxprom = sext i32 %dec3 to i64
|
||||
%arrayidx = getelementptr inbounds i32, i32* %data1, i64 %idxprom
|
||||
%1 = load i32, i32* %arrayidx, align 4
|
||||
%idxprom1 = sext i32 %dec3 to i64
|
||||
%arrayidx2 = getelementptr inbounds i32, i32* %data2, i64 %idxprom1
|
||||
%2 = load i32, i32* %arrayidx2, align 4
|
||||
%cmp = icmp sgt i32 %1, %2
|
||||
br i1 %cmp, label %if.then, label %while.cond.backedge
|
||||
|
||||
if.then: ; preds = %while.body
|
||||
%idxprom3 = sext i32 %dec3 to i64
|
||||
%arrayidx4 = getelementptr inbounds i32, i32* %data2, i64 %idxprom3
|
||||
%3 = load i32, i32* %arrayidx4, align 4
|
||||
%idxprom5 = sext i32 %dec3 to i64
|
||||
%arrayidx6 = getelementptr inbounds i32, i32* %data1, i64 %idxprom5
|
||||
store i32 %3, i32* %arrayidx6, align 4
|
||||
br label %while.cond.backedge
|
||||
|
||||
while.cond.backedge: ; preds = %if.then, %while.body
|
||||
%tobool = icmp eq i32 %dec3, 0
|
||||
br i1 %tobool, label %while.cond.while.end_crit_edge, label %while.body
|
||||
|
||||
while.cond.while.end_crit_edge: ; preds = %while.cond.backedge
|
||||
br label %while.end
|
||||
|
||||
while.end: ; preds = %while.cond.while.end_crit_edge, %entry.split
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue