forked from OSchip/llvm-project
[LoopFusion] Move instructions from FC1.Preheader to FC0.Preheader when
proven safe. Summary: Currently LoopFusion give up when the second loop nest preheader is not empty. For example: for (int i = 0; i < 100; ++i) {} x+=1; for (int i = 0; i < 100; ++i) {} The above example should be safe to fuse. This PR moves instructions in FC1 preheader (e.g. x+=1; ) to FC0 preheader, which then LoopFusion is able to fuse them. Reviewer: kbarton, Meinersbur, jdoerfert, dmgreen, fhahn, hfinkel, bmahjour, etiotto Reviewed By: jdoerfert Subscribers: hiraditya, llvm-commits Tag: LLVM Differential Revision: https://reviews.llvm.org/D71821
This commit is contained in:
parent
0bec0e7151
commit
da58e68fdf
|
@ -41,6 +41,12 @@ bool isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint,
|
|||
DominatorTree &DT, const PostDominatorTree &PDT,
|
||||
DependenceInfo &DI);
|
||||
|
||||
/// Return true if all instructions (except the terminator) in \p BB can be
|
||||
/// safely moved before \p InsertPoint.
|
||||
bool isSafeToMoveBefore(BasicBlock &BB, Instruction &InsertPoint,
|
||||
DominatorTree &DT, const PostDominatorTree &PDT,
|
||||
DependenceInfo &DI);
|
||||
|
||||
/// Move instructions, in an order-preserving manner, from \p FromBB to the
|
||||
/// beginning of \p ToBB when proven safe.
|
||||
void moveInstructionsToTheBeginning(BasicBlock &FromBB, BasicBlock &ToBB,
|
||||
|
@ -48,6 +54,12 @@ void moveInstructionsToTheBeginning(BasicBlock &FromBB, BasicBlock &ToBB,
|
|||
const PostDominatorTree &PDT,
|
||||
DependenceInfo &DI);
|
||||
|
||||
/// Move instructions, in an order-preserving manner, from \p FromBB to the end
|
||||
/// of \p ToBB when proven safe.
|
||||
void moveInstructionsToTheEnd(BasicBlock &FromBB, BasicBlock &ToBB,
|
||||
DominatorTree &DT, const PostDominatorTree &PDT,
|
||||
DependenceInfo &DI);
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_TRANSFORMS_UTILS_CODEMOVERUTILS_H
|
||||
|
|
|
@ -86,7 +86,9 @@ STATISTIC(UnknownTripCount, "Loop has unknown trip count");
|
|||
STATISTIC(UncomputableTripCount, "SCEV cannot compute trip count of loop");
|
||||
STATISTIC(NonEqualTripCount, "Loop trip counts are not the same");
|
||||
STATISTIC(NonAdjacent, "Loops are not adjacent");
|
||||
STATISTIC(NonEmptyPreheader, "Loop has a non-empty preheader");
|
||||
STATISTIC(
|
||||
NonEmptyPreheader,
|
||||
"Loop has a non-empty preheader with instructions that cannot be moved");
|
||||
STATISTIC(FusionNotBeneficial, "Fusion is not beneficial");
|
||||
STATISTIC(NonIdenticalGuards, "Candidates have different guards");
|
||||
STATISTIC(NonEmptyExitBlock, "Candidate has a non-empty exit block");
|
||||
|
@ -738,19 +740,21 @@ private:
|
|||
continue;
|
||||
}
|
||||
|
||||
// The following three checks look for empty blocks in FC0 and FC1. If
|
||||
// any of these blocks are non-empty, we do not fuse. This is done
|
||||
// because we currently do not have the safety checks to determine if
|
||||
// it is safe to move the blocks past other blocks in the loop. Once
|
||||
// these checks are added, these conditions can be relaxed.
|
||||
if (!isEmptyPreheader(*FC1)) {
|
||||
LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty "
|
||||
"preheader. Not fusing.\n");
|
||||
if (!isSafeToMoveBefore(*FC1->Preheader,
|
||||
*FC0->Preheader->getTerminator(), DT, PDT,
|
||||
DI)) {
|
||||
LLVM_DEBUG(dbgs() << "Fusion candidate contains unsafe "
|
||||
"instructions in preheader. Not fusing.\n");
|
||||
reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
|
||||
NonEmptyPreheader);
|
||||
continue;
|
||||
}
|
||||
|
||||
// The following two checks look for empty blocks in FC0 and FC1. If
|
||||
// any of these blocks are non-empty, we do not fuse. This is done
|
||||
// because we currently do not have the safety checks to determine if
|
||||
// it is safe to move the blocks past other blocks in the loop. Once
|
||||
// these checks are added, these conditions can be relaxed.
|
||||
if (FC0->GuardBranch && !isEmptyExitBlock(*FC0)) {
|
||||
LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty exit "
|
||||
"block. Not fusing.\n");
|
||||
|
@ -1166,6 +1170,10 @@ private:
|
|||
LLVM_DEBUG(dbgs() << "Fusion Candidate 0: \n"; FC0.dump();
|
||||
dbgs() << "Fusion Candidate 1: \n"; FC1.dump(););
|
||||
|
||||
// Move instructions from the preheader of FC1 to the end of the preheader
|
||||
// of FC0.
|
||||
moveInstructionsToTheEnd(*FC1.Preheader, *FC0.Preheader, DT, PDT, DI);
|
||||
|
||||
// Fusing guarded loops is handled slightly differently than non-guarded
|
||||
// loops and has been broken out into a separate method instead of trying to
|
||||
// intersperse the logic within a single method.
|
||||
|
|
|
@ -380,6 +380,17 @@ bool llvm::isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint,
|
|||
return true;
|
||||
}
|
||||
|
||||
bool llvm::isSafeToMoveBefore(BasicBlock &BB, Instruction &InsertPoint,
|
||||
DominatorTree &DT, const PostDominatorTree &PDT,
|
||||
DependenceInfo &DI) {
|
||||
return llvm::all_of(BB, [&](Instruction &I) {
|
||||
if (BB.getTerminator() == &I)
|
||||
return true;
|
||||
|
||||
return isSafeToMoveBefore(I, InsertPoint, DT, PDT, DI);
|
||||
});
|
||||
}
|
||||
|
||||
void llvm::moveInstructionsToTheBeginning(BasicBlock &FromBB, BasicBlock &ToBB,
|
||||
DominatorTree &DT,
|
||||
const PostDominatorTree &PDT,
|
||||
|
@ -394,3 +405,15 @@ void llvm::moveInstructionsToTheBeginning(BasicBlock &FromBB, BasicBlock &ToBB,
|
|||
I.moveBefore(MovePos);
|
||||
}
|
||||
}
|
||||
|
||||
void llvm::moveInstructionsToTheEnd(BasicBlock &FromBB, BasicBlock &ToBB,
|
||||
DominatorTree &DT,
|
||||
const PostDominatorTree &PDT,
|
||||
DependenceInfo &DI) {
|
||||
Instruction *MovePos = ToBB.getTerminator();
|
||||
while (FromBB.size() > 1) {
|
||||
Instruction &I = FromBB.front();
|
||||
if (isSafeToMoveBefore(I, *MovePos, DT, PDT, DI))
|
||||
I.moveBefore(MovePos);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -104,7 +104,7 @@ for.end15: ; preds = %for.cond.cleanup5
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK: remark: diagnostics_missed.c:38:3: [negative_dependence]: entry and for.end: Loop has a non-empty preheader
|
||||
; CHECK: remark: diagnostics_missed.c:38:3: [negative_dependence]: entry and for.end: Dependencies prevent fusion
|
||||
define void @negative_dependence(i32* noalias %A) !dbg !51 {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
@ -185,6 +185,36 @@ for.end12: ; preds = %for.inc10
|
|||
; Function Attrs: nounwind readnone speculatable willreturn
|
||||
declare void @llvm.dbg.value(metadata, metadata, metadata) #0
|
||||
|
||||
; CHECK: remark: diagnostics_missed.c:62:3: [unsafe_preheader]: for.first.preheader and for.second.preheader: Loop has a non-empty preheader with instructions that cannot be moved
|
||||
define void @unsafe_preheader(i32* noalias %A, i32* noalias %B) {
|
||||
for.first.preheader:
|
||||
br label %for.first, !dbg !80
|
||||
|
||||
for.first:
|
||||
%i.02 = phi i64 [ 0, %for.first.preheader ], [ %inc, %for.first ]
|
||||
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.02
|
||||
store i32 0, i32* %arrayidx, align 4
|
||||
%inc = add nsw i64 %i.02, 1
|
||||
%cmp = icmp slt i64 %inc, 100
|
||||
br i1 %cmp, label %for.first, label %for.second.preheader
|
||||
|
||||
for.second.preheader:
|
||||
call void @bar()
|
||||
br label %for.second
|
||||
|
||||
for.second:
|
||||
%j.01 = phi i64 [ 0, %for.second.preheader ], [ %inc6, %for.second ]
|
||||
%arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %j.01
|
||||
store i32 0, i32* %arrayidx4, align 4
|
||||
%inc6 = add nsw i64 %j.01, 1
|
||||
%cmp2 = icmp slt i64 %inc6, 100
|
||||
br i1 %cmp2, label %for.second, label %for.end
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
declare void @bar()
|
||||
|
||||
attributes #0 = { nounwind readnone speculatable willreturn }
|
||||
|
||||
!llvm.dbg.cu = !{!2}
|
||||
|
@ -267,3 +297,7 @@ attributes #0 = { nounwind readnone speculatable willreturn }
|
|||
!74 = !DILocation(line: 51, column: 3, scope: !70)
|
||||
!75 = !DILocation(line: 52, column: 15, scope: !70)
|
||||
!76 = !DILocation(line: 57, column: 3, scope: !63)
|
||||
!77 = distinct !DISubprogram(name: "unsafe_preheader", scope: !3, file: !3, line: 60, type: !15, scopeLine: 60, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !78)
|
||||
!78 = !{}
|
||||
!79 = distinct !DILexicalBlock(scope: !77, file: !3, line: 3, column: 5)
|
||||
!80 = !DILocation(line: 62, column: 3, scope: !79)
|
||||
|
|
|
@ -63,3 +63,59 @@ bb15: ; preds = %bb9
|
|||
bb12: ; preds = %bb15, %bb14
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test that `%add` is moved in for.first.preheader, and the two loops for.first
|
||||
; and for.second are fused.
|
||||
|
||||
; CHECK: void @moveinsts_preheader
|
||||
; CHECK-LABEL: for.first.guard:
|
||||
; CHECK: br i1 %cmp.guard, label %for.first.preheader, label %for.end
|
||||
; CHECK-LABEL: for.first.preheader:
|
||||
; CHECK-NEXT: %add = add nsw i32 %x, 1
|
||||
; CHECK-NEXT: br label %for.first
|
||||
; CHECK-LABEL: for.first:
|
||||
; CHECK: br i1 %cmp.j, label %for.first, label %for.second.exit
|
||||
; CHECK-LABEL: for.second.exit:
|
||||
; CHECK-NEXT: br label %for.end
|
||||
; CHECK-LABEL: for.end:
|
||||
; CHECK-NEXT: ret void
|
||||
define void @moveinsts_preheader(i32* noalias %A, i32* noalias %B, i64 %N, i32 %x) {
|
||||
for.first.guard:
|
||||
%cmp.guard = icmp slt i64 0, %N
|
||||
br i1 %cmp.guard, label %for.first.preheader, label %for.second.guard
|
||||
|
||||
for.first.preheader:
|
||||
br label %for.first
|
||||
|
||||
for.first:
|
||||
%i = phi i64 [ %inc.i, %for.first ], [ 0, %for.first.preheader ]
|
||||
%Ai = getelementptr inbounds i32, i32* %A, i64 %i
|
||||
store i32 0, i32* %Ai, align 4
|
||||
%inc.i = add nsw i64 %i, 1
|
||||
%cmp.i = icmp slt i64 %inc.i, %N
|
||||
br i1 %cmp.i, label %for.first, label %for.first.exit
|
||||
|
||||
for.first.exit:
|
||||
br label %for.second.guard
|
||||
|
||||
for.second.guard:
|
||||
br i1 %cmp.guard, label %for.second.preheader, label %for.end
|
||||
|
||||
for.second.preheader:
|
||||
%add = add nsw i32 %x, 1
|
||||
br label %for.second
|
||||
|
||||
for.second:
|
||||
%j = phi i64 [ %inc.j, %for.second ], [ 0, %for.second.preheader ]
|
||||
%Bj = getelementptr inbounds i32, i32* %B, i64 %j
|
||||
store i32 0, i32* %Bj, align 4
|
||||
%inc.j = add nsw i64 %j, 1
|
||||
%cmp.j = icmp slt i64 %inc.j, %N
|
||||
br i1 %cmp.j, label %for.second, label %for.second.exit
|
||||
|
||||
for.second.exit:
|
||||
br label %for.end
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
|
|
@ -306,3 +306,88 @@ for.second.latch:
|
|||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test that `%add` is moved in basic block entry, and the two loops for.first
|
||||
; and for.second are fused.
|
||||
|
||||
; CHECK: i32 @moveinsts_preheader
|
||||
; CHECK-LABEL: entry:
|
||||
; CHECK-NEXT: %add = add nsw i32 %x, 1
|
||||
; CHECK-NEXT: br label %for.first
|
||||
; CHECK-LABEL: for.first:
|
||||
; CHECK: br i1 %cmp.j, label %for.first, label %for.second.exit
|
||||
; CHECK-LABEL: for.second.exit:
|
||||
; CHECK-NEXT: ret i32 %add
|
||||
|
||||
define i32 @moveinsts_preheader(i32* %A, i32 %x) {
|
||||
entry:
|
||||
br label %for.first
|
||||
|
||||
for.first:
|
||||
%i = phi i64 [ 0, %entry ], [ %inc.i, %for.first ]
|
||||
%Ai = getelementptr inbounds i32, i32* %A, i64 %i
|
||||
store i32 0, i32* %Ai, align 4
|
||||
%inc.i = add nsw i64 %i, 1
|
||||
%cmp.i = icmp slt i64 %inc.i, 100
|
||||
br i1 %cmp.i, label %for.first, label %for.first.exit
|
||||
|
||||
for.first.exit:
|
||||
%add = add nsw i32 %x, 1
|
||||
br label %for.second
|
||||
|
||||
for.second:
|
||||
%j = phi i64 [ 0, %for.first.exit ], [ %inc.j, %for.second ]
|
||||
%Aj = getelementptr inbounds i32, i32* %A, i64 %j
|
||||
store i32 2, i32* %Aj, align 4
|
||||
%inc.j = add nsw i64 %j, 1
|
||||
%cmp.j = icmp slt i64 %inc.j, 100
|
||||
br i1 %cmp.j, label %for.second, label %for.second.exit
|
||||
|
||||
for.second.exit:
|
||||
ret i32 %add
|
||||
}
|
||||
|
||||
; Test that `%add` cannot be moved to basic block entry, as it uses %i, which
|
||||
; defined after basic block entry. And the two loops for.first and for.second
|
||||
; are not fused.
|
||||
|
||||
; CHECK: i64 @unsafe_preheader
|
||||
; CHECK-LABEL: entry:
|
||||
; CHECK-NEXT: br label %for.first
|
||||
; CHECK-LABEL: for.first:
|
||||
; CHECK: br i1 %cmp.i, label %for.first, label %for.first.exit
|
||||
; CHECK-LABEL: for.first.exit:
|
||||
; CHECK-NEXT: %add = add nsw i64 %x, %i
|
||||
; CHECK-NEXT: br label %for.second
|
||||
; CHECK-LABEL: for.second:
|
||||
; CHECK: br i1 %cmp.j, label %for.second, label %for.second.exit
|
||||
; CHECK-LABEL: for.second.exit:
|
||||
; CHECK-NEXT: ret i64 %add
|
||||
|
||||
define i64 @unsafe_preheader(i32* %A, i64 %x) {
|
||||
entry:
|
||||
br label %for.first
|
||||
|
||||
for.first:
|
||||
%i = phi i64 [ 0, %entry ], [ %inc.i, %for.first ]
|
||||
%Ai = getelementptr inbounds i32, i32* %A, i64 %i
|
||||
store i32 0, i32* %Ai, align 4
|
||||
%inc.i = add nsw i64 %i, 1
|
||||
%cmp.i = icmp slt i64 %inc.i, 100
|
||||
br i1 %cmp.i, label %for.first, label %for.first.exit
|
||||
|
||||
for.first.exit:
|
||||
%add = add nsw i64 %x, %i
|
||||
br label %for.second
|
||||
|
||||
for.second:
|
||||
%j = phi i64 [ 0, %for.first.exit ], [ %inc.j, %for.second ]
|
||||
%Aj = getelementptr inbounds i32, i32* %A, i64 %j
|
||||
store i32 2, i32* %Aj, align 4
|
||||
%inc.j = add nsw i64 %j, 1
|
||||
%cmp.j = icmp slt i64 %inc.j, 100
|
||||
br i1 %cmp.j, label %for.second, label %for.second.exit
|
||||
|
||||
for.second.exit:
|
||||
ret i64 %add
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue