forked from OSchip/llvm-project
LoopIdiom: Replace custom dependence analysis with LoopDependenceAnalysis.
Requires a lot less code and complexity on loop-idiom's side and the more precise analysis can catch more cases, like the one I included as a test case. This also fixes the edge-case miscompilation from PR9481. I'm not entirely sure that all cases are handled that the old checks handled but LDA will certainly become smarter in the future. llvm-svn: 166390
This commit is contained in:
parent
2d1b1ad831
commit
3ae8bc68af
|
@ -48,6 +48,7 @@
|
|||
#include "llvm/Module.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/Analysis/LoopDependenceAnalysis.h"
|
||||
#include "llvm/Analysis/LoopPass.h"
|
||||
#include "llvm/Analysis/ScalarEvolutionExpander.h"
|
||||
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
|
||||
|
@ -106,6 +107,8 @@ namespace {
|
|||
AU.addPreserved<AliasAnalysis>();
|
||||
AU.addRequired<ScalarEvolution>();
|
||||
AU.addPreserved<ScalarEvolution>();
|
||||
AU.addRequired<LoopDependenceAnalysis>();
|
||||
AU.addPreserved<LoopDependenceAnalysis>();
|
||||
AU.addPreserved<DominatorTree>();
|
||||
AU.addRequired<DominatorTree>();
|
||||
AU.addRequired<TargetLibraryInfo>();
|
||||
|
@ -122,6 +125,7 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
|
|||
INITIALIZE_PASS_DEPENDENCY(LCSSA)
|
||||
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
|
||||
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
|
||||
INITIALIZE_PASS_DEPENDENCY(LoopDependenceAnalysis)
|
||||
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
|
||||
INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
|
||||
false, false)
|
||||
|
@ -163,15 +167,6 @@ static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE,
|
|||
} while (!NowDeadInsts.empty());
|
||||
}
|
||||
|
||||
/// deleteIfDeadInstruction - If the specified value is a dead instruction,
|
||||
/// delete it and any recursively used instructions.
|
||||
static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE,
|
||||
const TargetLibraryInfo *TLI) {
|
||||
if (Instruction *I = dyn_cast<Instruction>(V))
|
||||
if (isInstructionTriviallyDead(I, TLI))
|
||||
deleteDeadInstruction(I, SE, TLI);
|
||||
}
|
||||
|
||||
bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
|
||||
CurLoop = L;
|
||||
|
||||
|
@ -368,35 +363,16 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) {
|
|||
MSI, Ev, BECount);
|
||||
}
|
||||
|
||||
|
||||
/// mayLoopAccessLocation - Return true if the specified loop might access the
|
||||
/// specified pointer location, which is a loop-strided access. The 'Access'
|
||||
/// argument specifies what the verboten forms of access are (read or write).
|
||||
static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,
|
||||
Loop *L, const SCEV *BECount,
|
||||
unsigned StoreSize, AliasAnalysis &AA,
|
||||
Instruction *IgnoredStore) {
|
||||
// Get the location that may be stored across the loop. Since the access is
|
||||
// strided positively through memory, we say that the modified location starts
|
||||
// at the pointer and has infinite size.
|
||||
uint64_t AccessSize = AliasAnalysis::UnknownSize;
|
||||
|
||||
// If the loop iterates a fixed number of times, we can refine the access size
|
||||
// to be exactly the size of the memset, which is (BECount+1)*StoreSize
|
||||
if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
|
||||
AccessSize = (BECst->getValue()->getZExtValue()+1)*StoreSize;
|
||||
|
||||
// TODO: For this to be really effective, we have to dive into the pointer
|
||||
// operand in the store. Store to &A[i] of 100 will always return may alias
|
||||
// with store of &A[100], we need to StoreLoc to be "A" with size of 100,
|
||||
// which will then no-alias a store to &A[100].
|
||||
AliasAnalysis::Location StoreLoc(Ptr, AccessSize);
|
||||
|
||||
/// hasDependence - Uses the LoopDependenceAnalysis to determine whether 'Inst'
|
||||
/// depends on any other value in the Loop 'L'.
|
||||
static bool hasDependence(Instruction *Inst, Loop *L,
|
||||
LoopDependenceAnalysis &LDA) {
|
||||
for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
|
||||
++BI)
|
||||
for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I)
|
||||
if (&*I != IgnoredStore &&
|
||||
(AA.getModRefInfo(I, StoreLoc) & Access))
|
||||
if (&*I != Inst && I->mayReadOrWriteMemory() &&
|
||||
(I->mayWriteToMemory() || Inst->mayWriteToMemory()) &&
|
||||
LDA.depends(Inst, I))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
|
@ -474,6 +450,11 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
|
|||
return false;
|
||||
}
|
||||
|
||||
// Make sure the store has no dependencies (i.e. other loads and stores) in
|
||||
// the loop.
|
||||
if (hasDependence(TheStore, CurLoop, getAnalysis<LoopDependenceAnalysis>()))
|
||||
return false;
|
||||
|
||||
// The trip count of the loop and the base pointer of the addrec SCEV is
|
||||
// guaranteed to be loop invariant, which means that it should dominate the
|
||||
// header. This allows us to insert code for it in the preheader.
|
||||
|
@ -482,25 +463,13 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
|
|||
SCEVExpander Expander(*SE, "loop-idiom");
|
||||
|
||||
// Okay, we have a strided store "p[i]" of a splattable value. We can turn
|
||||
// this into a memset in the loop preheader now if we want. However, this
|
||||
// would be unsafe to do if there is anything else in the loop that may read
|
||||
// or write to the aliased location. Check for any overlap by generating the
|
||||
// base pointer and checking the region.
|
||||
// this into a memset in the loop preheader now if we want.
|
||||
unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace();
|
||||
Value *BasePtr =
|
||||
Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace),
|
||||
Preheader->getTerminator());
|
||||
|
||||
|
||||
if (mayLoopAccessLocation(BasePtr, AliasAnalysis::ModRef,
|
||||
CurLoop, BECount,
|
||||
StoreSize, getAnalysis<AliasAnalysis>(), TheStore)){
|
||||
Expander.clear();
|
||||
// If we generated new code for the base pointer, clean up.
|
||||
deleteIfDeadInstruction(BasePtr, *SE, TLI);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Okay, everything looks good, insert the memset.
|
||||
|
||||
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
|
||||
|
@ -563,6 +532,14 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
|
|||
|
||||
LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
|
||||
|
||||
// Make sure the load and the store have no dependencies (i.e. other loads and
|
||||
// stores) in the loop.
|
||||
// FIXME: If we want to form a memmove SI and LI can be dependent but the
|
||||
// distance must be positive. LDA doesn't provide that info currently.
|
||||
LoopDependenceAnalysis &LDA = getAnalysis<LoopDependenceAnalysis>();
|
||||
if (hasDependence(SI, CurLoop, LDA) || hasDependence(LI, CurLoop, LDA))
|
||||
return false;
|
||||
|
||||
// The trip count of the loop and the base pointer of the addrec SCEV is
|
||||
// guaranteed to be loop invariant, which means that it should dominate the
|
||||
// header. This allows us to insert code for it in the preheader.
|
||||
|
@ -571,41 +548,16 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
|
|||
SCEVExpander Expander(*SE, "loop-idiom");
|
||||
|
||||
// Okay, we have a strided store "p[i]" of a loaded value. We can turn
|
||||
// this into a memcpy in the loop preheader now if we want. However, this
|
||||
// would be unsafe to do if there is anything else in the loop that may read
|
||||
// or write the memory region we're storing to. This includes the load that
|
||||
// feeds the stores. Check for an alias by generating the base address and
|
||||
// checking everything.
|
||||
// this into a memcpy in the loop preheader now if we want.
|
||||
Value *StoreBasePtr =
|
||||
Expander.expandCodeFor(StoreEv->getStart(),
|
||||
Builder.getInt8PtrTy(SI->getPointerAddressSpace()),
|
||||
Preheader->getTerminator());
|
||||
|
||||
if (mayLoopAccessLocation(StoreBasePtr, AliasAnalysis::ModRef,
|
||||
CurLoop, BECount, StoreSize,
|
||||
getAnalysis<AliasAnalysis>(), SI)) {
|
||||
Expander.clear();
|
||||
// If we generated new code for the base pointer, clean up.
|
||||
deleteIfDeadInstruction(StoreBasePtr, *SE, TLI);
|
||||
return false;
|
||||
}
|
||||
|
||||
// For a memcpy, we have to make sure that the input array is not being
|
||||
// mutated by the loop.
|
||||
Value *LoadBasePtr =
|
||||
Expander.expandCodeFor(LoadEv->getStart(),
|
||||
Builder.getInt8PtrTy(LI->getPointerAddressSpace()),
|
||||
Preheader->getTerminator());
|
||||
|
||||
if (mayLoopAccessLocation(LoadBasePtr, AliasAnalysis::Mod, CurLoop, BECount,
|
||||
StoreSize, getAnalysis<AliasAnalysis>(), SI)) {
|
||||
Expander.clear();
|
||||
// If we generated new code for the base pointer, clean up.
|
||||
deleteIfDeadInstruction(LoadBasePtr, *SE, TLI);
|
||||
deleteIfDeadInstruction(StoreBasePtr, *SE, TLI);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Okay, everything is safe, we can transform this!
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
||||
target triple = "x86_64-apple-darwin10.0.0"
|
||||
|
||||
%struct.ham = type { [2 x [2 x [2 x [16 x [8 x i32]]]]], i32, %struct.zot }
|
||||
%struct.zot = type { i32, i16, i16, [2 x [1152 x i32]] }
|
||||
|
||||
define void @test1(%struct.ham* nocapture %arg) nounwind {
|
||||
bb:
|
||||
br label %bb1
|
||||
|
||||
bb1: ; preds = %bb11, %bb
|
||||
%tmp = phi i64 [ 0, %bb ], [ %tmp12, %bb11 ]
|
||||
br label %bb2
|
||||
|
||||
bb2: ; preds = %bb2, %bb1
|
||||
%tmp3 = phi i64 [ 0, %bb1 ], [ %tmp8, %bb2 ]
|
||||
%tmp4 = getelementptr inbounds %struct.ham* %arg, i64 0, i32 0, i64 0, i64 1, i64 1, i64 %tmp, i64 %tmp3
|
||||
store i32 0, i32* %tmp4, align 4
|
||||
%tmp5 = getelementptr inbounds %struct.ham* %arg, i64 0, i32 0, i64 0, i64 1, i64 0, i64 %tmp, i64 %tmp3
|
||||
store i32 0, i32* %tmp5, align 4
|
||||
%tmp6 = getelementptr inbounds %struct.ham* %arg, i64 0, i32 0, i64 0, i64 0, i64 1, i64 %tmp, i64 %tmp3
|
||||
store i32 0, i32* %tmp6, align 4
|
||||
%tmp7 = getelementptr inbounds %struct.ham* %arg, i64 0, i32 0, i64 0, i64 0, i64 0, i64 %tmp, i64 %tmp3
|
||||
store i32 0, i32* %tmp7, align 4
|
||||
%tmp8 = add i64 %tmp3, 1
|
||||
%tmp9 = trunc i64 %tmp8 to i32
|
||||
%tmp10 = icmp eq i32 %tmp9, 8
|
||||
br i1 %tmp10, label %bb11, label %bb2
|
||||
|
||||
bb11: ; preds = %bb2
|
||||
%tmp12 = add i64 %tmp, 1
|
||||
%tmp13 = trunc i64 %tmp12 to i32
|
||||
%tmp14 = icmp eq i32 %tmp13, 16
|
||||
br i1 %tmp14, label %bb15, label %bb1
|
||||
|
||||
bb15: ; preds = %bb11
|
||||
ret void
|
||||
|
||||
; CHECK: @test1
|
||||
; CHECK: bb1:
|
||||
; CHECK-NOT: store
|
||||
; CHECK: call void @llvm.memset.p0i8.i64
|
||||
; CHECK-NEXT: call void @llvm.memset.p0i8.i64
|
||||
; CHECK-NEXT: call void @llvm.memset.p0i8.i64
|
||||
; CHECK-NEXT: call void @llvm.memset.p0i8.i64
|
||||
; CHECK-NOT: store
|
||||
; CHECK: br
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
||||
target triple = "x86_64-apple-darwin10.0.0"
|
||||
|
||||
; PR9481
|
||||
define i32 @test1() nounwind uwtable ssp {
|
||||
entry:
|
||||
%a = alloca [10 x i8], align 1
|
||||
br label %for.body
|
||||
|
||||
for.cond1.preheader: ; preds = %for.body
|
||||
%arrayidx5.phi.trans.insert = getelementptr inbounds [10 x i8]* %a, i64 0, i64 0
|
||||
%.pre = load i8* %arrayidx5.phi.trans.insert, align 1
|
||||
br label %for.body3
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%indvars.iv29 = phi i64 [ 0, %entry ], [ %indvars.iv.next30, %for.body ]
|
||||
call void (...)* @bar() nounwind
|
||||
%arrayidx = getelementptr inbounds [10 x i8]* %a, i64 0, i64 %indvars.iv29
|
||||
store i8 23, i8* %arrayidx, align 1
|
||||
%indvars.iv.next30 = add i64 %indvars.iv29, 1
|
||||
%lftr.wideiv31 = trunc i64 %indvars.iv.next30 to i32
|
||||
%exitcond32 = icmp eq i32 %lftr.wideiv31, 1000000
|
||||
br i1 %exitcond32, label %for.cond1.preheader, label %for.body
|
||||
|
||||
for.body3: ; preds = %for.body3, %for.cond1.preheader
|
||||
%0 = phi i8 [ %.pre, %for.cond1.preheader ], [ %add, %for.body3 ]
|
||||
%indvars.iv = phi i64 [ 1, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
|
||||
call void (...)* @bar() nounwind
|
||||
%arrayidx7 = getelementptr inbounds [10 x i8]* %a, i64 0, i64 %indvars.iv
|
||||
%1 = load i8* %arrayidx7, align 1
|
||||
%add = add i8 %1, %0
|
||||
store i8 %add, i8* %arrayidx7, align 1
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, 1000000
|
||||
br i1 %exitcond, label %for.end12, label %for.body3
|
||||
|
||||
for.end12: ; preds = %for.body3
|
||||
%arrayidx13 = getelementptr inbounds [10 x i8]* %a, i64 0, i64 2
|
||||
%2 = load i8* %arrayidx13, align 1
|
||||
%conv14 = sext i8 %2 to i32
|
||||
%arrayidx15 = getelementptr inbounds [10 x i8]* %a, i64 0, i64 6
|
||||
%3 = load i8* %arrayidx15, align 1
|
||||
%conv16 = sext i8 %3 to i32
|
||||
%add17 = add nsw i32 %conv16, %conv14
|
||||
ret i32 %add17
|
||||
|
||||
; CHECK: @test1
|
||||
; CHECK-NOT: @llvm.memset
|
||||
}
|
||||
|
||||
declare void @bar(...)
|
Loading…
Reference in New Issue