[FIX] Use escape logic for invariant loads

Before we unconditinoally forced all users outside the SCoP to use
  the preloaded value. However, if the SCoP is not executed due to the
  runtime checks, we need to use the original value because it might not
  be invariant in the first place.

llvm-svn: 248881
This commit is contained in:
Johannes Doerfert 2015-09-30 09:43:20 +00:00
parent a176421da5
commit ef19ead20e
3 changed files with 133 additions and 8 deletions

View File

@ -890,6 +890,7 @@ void IslNodeBuilder::preloadInvariantLoads() {
return;
const Region &R = S.getRegion();
BasicBlock *EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock();
BasicBlock *PreLoadBB =
SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI);
@ -915,17 +916,27 @@ void IslNodeBuilder::preloadInvariantLoads() {
isl_id_free(ParamId);
}
SmallVector<Instruction *, 4> Users;
for (auto *U : AccInst->users())
if (Instruction *UI = dyn_cast<Instruction>(U))
if (!R.contains(UI))
Users.push_back(UI);
for (auto *U : Users)
U->replaceUsesOfWith(AccInst, PreloadVal);
auto *SAI = S.getScopArrayInfo(MA->getBaseAddr());
for (auto *DerivedSAI : SAI->getDerivedSAIs())
DerivedSAI->setBasePtr(PreloadVal);
// Use the escape system to get the correct value to users outside
// the SCoP.
BlockGenerator::EscapeUserVectorTy EscapeUsers;
for (auto *U : AccInst->users())
if (Instruction *UI = dyn_cast<Instruction>(U))
if (!R.contains(UI))
EscapeUsers.push_back(UI);
if (EscapeUsers.empty())
continue;
auto *Ty = AccInst->getType();
auto *Alloca = new AllocaInst(Ty, AccInst->getName() + ".preload.s2a");
Alloca->insertBefore(EntryBB->getFirstInsertionPt());
Builder.CreateStore(PreloadVal, Alloca);
EscapeMap[AccInst] = std::make_pair(Alloca, std::move(EscapeUsers));
}
isl_ast_build_free(Build);

View File

@ -0,0 +1,55 @@
; RUN: opt %loadPolly -polly-codegen -polly-detect-unprofitable -S < %s | FileCheck %s
;
; int f(int *A, int *B) {
; // Possible aliasing between A and B but if not then *B would be
; // invariant. We assume this and hoist *B but need to use a merged
; // version in the return.
; int i = 0;
; int x = 0;
;
; do {
; x = *B;
; A[i] += x;
; } while (i++ < 100);
;
; return x;
; }
;
; CHECK: polly.preload.begin:
; CHECK: %polly.access.B = getelementptr i32, i32* %B, i64 0
; CHECK: %polly.access.B.load = load i32, i32* %polly.access.B
; CHECK: store i32 %polly.access.B.load, i32* %tmp.preload.s2a
;
; CHECK: polly.merge_new_and_old:
; CHECK: %tmp.merge = phi i32 [ %tmp.final_reload, %polly.loop_exit ], [ %tmp, %do.cond ]
; CHECK: br label %do.end
;
; CHECK: do.end:
; CHECK: ret i32 %tmp.merge
;
; CHECK: polly.loop_exit:
; CHECK: %tmp.final_reload = load i32, i32* %tmp.preload.s2a
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define i32 @f(i32* %A, i32* %B) {
entry:
br label %do.body
do.body: ; preds = %do.cond, %entry
%indvars.iv = phi i64 [ %indvars.iv.next, %do.cond ], [ 0, %entry ]
%tmp = load i32, i32* %B, align 4
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
%tmp1 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %tmp1, %tmp
store i32 %add, i32* %arrayidx, align 4
br label %do.cond
do.cond: ; preds = %do.body
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp ne i64 %indvars.iv.next, 101
br i1 %exitcond, label %do.body, label %do.end
do.end: ; preds = %do.cond
ret i32 %tmp
}

View File

@ -0,0 +1,59 @@
; RUN: opt %loadPolly -polly-detect-unprofitable -polly-codegen -S < %s | FileCheck %s
; CHECK: polly.merge_new_and_old:
; CHECK-NEXT: merge = phi
%struct.ImageParameters = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8**, i8**, i32, i32***, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [9 x [16 x [16 x i16]]], [5 x [16 x [16 x i16]]], [9 x [8 x [8 x i16]]], [2 x [4 x [16 x [16 x i16]]]], [16 x [16 x i16]], [16 x [16 x i32]], i32****, i32***, i32***, i32***, i32****, i32****, %struct.Picture*, %struct.Slice*, %struct.macroblock*, i32*, i32*, i32, i32, i32, i32, [4 x [4 x i32]], i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i16******, i16******, i16******, i16******, [15 x i16], i32, i32, i32, i32, i32, i32, i32, i32, [6 x [32 x i32]], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [1 x i32], i32, i32, [2 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.DecRefPicMarking*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, double**, double***, i32***, double**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [3 x [2 x i32]], [2 x i32], i32, i32, i16, i32, i32, i32, i32, i32 }
%struct.Picture = type { i32, i32, [100 x %struct.Slice*], i32, float, float, float }
%struct.Slice = type { i32, i32, i32, i32, i32, i32, %struct.datapartition*, %struct.MotionInfoContexts*, %struct.TextureInfoContexts*, i32, i32*, i32*, i32*, i32, i32*, i32*, i32*, i32 (i32)*, [3 x [2 x i32]] }
%struct.datapartition = type { %struct.Bitstream*, %struct.EncodingEnvironment, %struct.EncodingEnvironment }
%struct.Bitstream = type { i32, i32, i8, i32, i32, i8, i8, i32, i32, i8*, i32 }
%struct.EncodingEnvironment = type { i32, i32, i32, i32, i32, i8*, i32*, i32, i32 }
%struct.MotionInfoContexts = type { [3 x [11 x %struct.BiContextType]], [2 x [9 x %struct.BiContextType]], [2 x [10 x %struct.BiContextType]], [2 x [6 x %struct.BiContextType]], [4 x %struct.BiContextType], [4 x %struct.BiContextType], [3 x %struct.BiContextType] }
%struct.BiContextType = type { i16, i8, i64 }
%struct.TextureInfoContexts = type { [2 x %struct.BiContextType], [3 x [4 x %struct.BiContextType]], [10 x [4 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]], [10 x [5 x %struct.BiContextType]], [10 x [5 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]] }
%struct.macroblock = type { i32, i32, i32, [2 x i32], i32, [8 x i32], %struct.macroblock*, i32, [2 x [4 x [4 x [2 x i32]]]], [16 x i8], [16 x i8], i32, i64, [4 x i32], [4 x i32], i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, double, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
%struct.DecRefPicMarking = type { i32, i32, i32, i32, i32, %struct.DecRefPicMarking* }
@img = external global %struct.ImageParameters*, align 8
define void @intrapred_luma() {
entry:
%PredPel = alloca [13 x i16], align 16
br label %for.body
for.body: ; preds = %for.body, %entry
br i1 undef, label %for.body, label %for.body.262
for.body.262: ; preds = %for.body
%0 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8
br label %for.body.280
for.body.280: ; preds = %for.body.280, %for.body.262
%indvars.iv66 = phi i64 [ 0, %for.body.262 ], [ %indvars.iv.next67, %for.body.280 ]
%arrayidx282 = getelementptr inbounds [13 x i16], [13 x i16]* %PredPel, i64 0, i64 1
%arrayidx283 = getelementptr inbounds i16, i16* %arrayidx282, i64 %indvars.iv66
%1 = load i16, i16* %arrayidx283, align 2
%arrayidx289 = getelementptr inbounds %struct.ImageParameters, %struct.ImageParameters* %0, i64 0, i32 47, i64 0, i64 2, i64 %indvars.iv66
store i16 %1, i16* %arrayidx289, align 2
%indvars.iv.next67 = add nuw nsw i64 %indvars.iv66, 1
br i1 false, label %for.body.280, label %for.end.298
for.end.298: ; preds = %for.body.280
%2 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8
br label %for.body.310
for.body.310: ; preds = %for.body.310, %for.end.298
%indvars.iv = phi i64 [ 0, %for.end.298 ], [ %indvars.iv.next, %for.body.310 ]
%InterScopSext = sext i16 %1 to i64
%arrayidx312 = getelementptr inbounds [13 x i16], [13 x i16]* %PredPel, i64 0, i64 %InterScopSext
%arrayidx313 = getelementptr inbounds i16, i16* %arrayidx312, i64 %indvars.iv
%3 = load i16, i16* %arrayidx313, align 2
%arrayidx322 = getelementptr inbounds %struct.ImageParameters, %struct.ImageParameters* %2, i64 0, i32 47, i64 1, i64 %indvars.iv, i64 1
store i16 %3, i16* %arrayidx322, align 2
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br i1 false, label %for.body.310, label %for.end.328
for.end.328: ; preds = %for.body.310
ret void
}