forked from OSchip/llvm-project
[LoopReroll] Fix rerolling loop with extra instructions
Fixes PR47627 This fix suppresses rerolling a loop which has an unrerollable instruction. Sample IR for the explanation below: ``` define void @foo([2 x i32]* nocapture %a) { entry: br label %loop loop: ; base instruction %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ] ; unrerollable instructions %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %indvar, i64 0 store i32 999, i32* %stptrx, align 4 ; extra simple arithmetic operations, used by root instructions %plus20 = add nuw nsw i64 %indvar, 20 %plus10 = add nuw nsw i64 %indvar, 10 ; root instruction 0 %ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0 %value0 = load i32, i32* %ldptr0, align 4 %stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0 store i32 %value0, i32* %stptr0, align 4 ; root instruction 1 %ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1 %value1 = load i32, i32* %ldptr1, align 4 %stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1 store i32 %value1, i32* %stptr1, align 4 ; loop-increment and latch %indvar.next = add nuw nsw i64 %indvar, 1 %exitcond = icmp eq i64 %indvar.next, 5 br i1 %exitcond, label %exit, label %loop exit: ret void } ``` In the loop rerolling pass, `%indvar` and `%indvar.next` are appended to the `LoopIncs` vector in the `LoopReroll::DAGRootTracker::findRoots` function. Before this fix, two instructions with `unrerollable instructions` comment above are marked as `IL_All` at the end of the `LoopReroll::DAGRootTracker::collectUsedInstructions` function, as well as instructions with `extra simple arithmetic operations` comment and `loop-increment and latch` comment. It is incorrect because `IL_All` means that the instruction should be executed in all iterations of the rerolled loop but the `store` instruction should not. This fix rejects instructions which may have side effects and don't belong to def-use chains of any root instructions and reductions. See https://bugs.llvm.org/show_bug.cgi?id=47627 for more information.
This commit is contained in:
parent
43cd588c2d
commit
d9a9c992d1
|
@ -1080,6 +1080,12 @@ bool LoopReroll::DAGRootTracker::collectUsedInstructions(SmallInstructionSet &Po
|
|||
DenseSet<Instruction*> V;
|
||||
collectInLoopUserSet(LoopIncs, Exclude, PossibleRedSet, V);
|
||||
for (auto *I : V) {
|
||||
if (I->mayHaveSideEffects()) {
|
||||
LLVM_DEBUG(dbgs() << "LRR: Aborting - "
|
||||
<< "An instruction which does not belong to any root "
|
||||
<< "sets must not have side effects: " << *I);
|
||||
return false;
|
||||
}
|
||||
Uses[I].set(IL_All);
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,268 @@
|
|||
; RUN: opt -S -loop-reroll %s | FileCheck %s
|
||||
target triple = "aarch64--linux-gnu"
|
||||
|
||||
define void @rerollable1([2 x i32]* nocapture %a) {
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
|
||||
; CHECK-LABEL: loop:
|
||||
; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
|
||||
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr [2 x i32], [2 x i32]* %a, i64 20, i64 %iv
|
||||
; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr [2 x i32], [2 x i32]* %a, i64 10, i64 %iv
|
||||
; CHECK-NEXT: [[VALUE:%.*]] = load i32, i32* [[SCEVGEP1]], align 4
|
||||
; CHECK-NEXT: store i32 [[VALUE]], i32* [[SCEVGEP2]], align 4
|
||||
|
||||
; base instruction
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
|
||||
|
||||
; NO unrerollable instructions
|
||||
|
||||
; extra simple arithmetic operations, used by root instructions
|
||||
%plus20 = add nuw nsw i64 %iv, 20
|
||||
%plus10 = add nuw nsw i64 %iv, 10
|
||||
|
||||
; root instruction 0
|
||||
%ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0
|
||||
%value0 = load i32, i32* %ldptr0, align 4
|
||||
%stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0
|
||||
store i32 %value0, i32* %stptr0, align 4
|
||||
|
||||
; root instruction 1
|
||||
%ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1
|
||||
%value1 = load i32, i32* %ldptr1, align 4
|
||||
%stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1
|
||||
store i32 %value1, i32* %stptr1, align 4
|
||||
|
||||
; loop-increment
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
|
||||
; latch
|
||||
%exitcond = icmp eq i64 %iv.next, 5
|
||||
br i1 %exitcond, label %exit, label %loop
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @unrerollable1([2 x i32]* nocapture %a) {
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
|
||||
; CHECK-LABEL: loop:
|
||||
; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
|
||||
; CHECK-NEXT: %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv, i64 0
|
||||
; CHECK-NEXT: store i32 999, i32* %stptrx, align 4
|
||||
|
||||
; base instruction
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
|
||||
|
||||
; unrerollable instructions using %iv
|
||||
%stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv, i64 0
|
||||
store i32 999, i32* %stptrx, align 4
|
||||
|
||||
; extra simple arithmetic operations, used by root instructions
|
||||
%plus20 = add nuw nsw i64 %iv, 20
|
||||
%plus10 = add nuw nsw i64 %iv, 10
|
||||
|
||||
; root instruction 0
|
||||
%ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0
|
||||
%value0 = load i32, i32* %ldptr0, align 4
|
||||
%stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0
|
||||
store i32 %value0, i32* %stptr0, align 4
|
||||
|
||||
; root instruction 1
|
||||
%ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1
|
||||
%value1 = load i32, i32* %ldptr1, align 4
|
||||
%stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1
|
||||
store i32 %value1, i32* %stptr1, align 4
|
||||
|
||||
; loop-increment
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
|
||||
; latch
|
||||
%exitcond = icmp eq i64 %iv.next, 5
|
||||
br i1 %exitcond, label %exit, label %loop
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @unrerollable2([2 x i32]* nocapture %a) {
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
|
||||
; CHECK-LABEL: loop:
|
||||
; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
|
||||
; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1
|
||||
; CHECK-NEXT: %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv.next, i64 0
|
||||
; CHECK-NEXT: store i32 999, i32* %stptrx, align 4
|
||||
|
||||
; base instruction
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
|
||||
|
||||
; loop-increment
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
|
||||
; unrerollable instructions using %iv.next
|
||||
%stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv.next, i64 0
|
||||
store i32 999, i32* %stptrx, align 4
|
||||
|
||||
; extra simple arithmetic operations, used by root instructions
|
||||
%plus20 = add nuw nsw i64 %iv, 20
|
||||
%plus10 = add nuw nsw i64 %iv, 10
|
||||
|
||||
; root instruction 0
|
||||
%ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0
|
||||
%value0 = load i32, i32* %ldptr0, align 4
|
||||
%stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0
|
||||
store i32 %value0, i32* %stptr0, align 4
|
||||
|
||||
; root instruction 1
|
||||
%ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1
|
||||
%value1 = load i32, i32* %ldptr1, align 4
|
||||
%stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1
|
||||
store i32 %value1, i32* %stptr1, align 4
|
||||
|
||||
; latch
|
||||
%exitcond = icmp eq i64 %iv.next, 5
|
||||
br i1 %exitcond, label %exit, label %loop
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
define dso_local void @rerollable2() {
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
|
||||
; CHECK-LABEL: loop:
|
||||
; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
|
||||
; CHECK-NEXT: {{%.*}} = add i32 %iv, {{20|24}}
|
||||
; CHECK-NEXT: {{%.*}} = add i32 %iv, {{20|24}}
|
||||
|
||||
; induction variable
|
||||
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
|
||||
|
||||
; scale instruction
|
||||
%iv.mul3 = mul nuw nsw i32 %iv, 3
|
||||
|
||||
; extra simple arithmetic operations, used by root instructions
|
||||
%iv.scaled = add nuw nsw i32 %iv.mul3, 20
|
||||
|
||||
; NO unrerollable instructions
|
||||
|
||||
; root set 1
|
||||
|
||||
; base instruction
|
||||
%iv.scaled.div5 = udiv i32 %iv.scaled, 5
|
||||
tail call void @bar(i32 %iv.scaled.div5)
|
||||
; root instruction 0
|
||||
%iv.scaled.add1 = add nuw nsw i32 %iv.scaled, 1
|
||||
%iv.scaled.add1.div5 = udiv i32 %iv.scaled.add1, 5
|
||||
tail call void @bar(i32 %iv.scaled.add1.div5)
|
||||
; root instruction 2
|
||||
%iv.scaled.add2 = add nuw nsw i32 %iv.scaled, 2
|
||||
%iv.scaled.add2.div5 = udiv i32 %iv.scaled.add2, 5
|
||||
tail call void @bar(i32 %iv.scaled.add2.div5)
|
||||
|
||||
; root set 2
|
||||
|
||||
; base instruction
|
||||
%iv.scaled.add4 = add nuw nsw i32 %iv.scaled, 4
|
||||
%iv.scaled.add4.div5 = udiv i32 %iv.scaled.add4, 5
|
||||
tail call void @bar(i32 %iv.scaled.add4.div5)
|
||||
; root instruction 0
|
||||
%iv.scaled.add5 = add nuw nsw i32 %iv.scaled, 5
|
||||
%iv.scaled.add5.div5 = udiv i32 %iv.scaled.add5, 5
|
||||
tail call void @bar(i32 %iv.scaled.add5.div5)
|
||||
; root instruction 2
|
||||
%iv.scaled.add6 = add nuw nsw i32 %iv.scaled, 6
|
||||
%iv.scaled.add6.div5 = udiv i32 %iv.scaled.add6, 5
|
||||
tail call void @bar(i32 %iv.scaled.add6.div5)
|
||||
|
||||
; loop-increment
|
||||
%iv.next = add nuw nsw i32 %iv, 1
|
||||
|
||||
; latch
|
||||
%cmp = icmp ult i32 %iv.next, 3
|
||||
br i1 %cmp, label %loop, label %exit
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
define dso_local void @unrerollable3() {
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
|
||||
; CHECK-LABEL: loop:
|
||||
; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
|
||||
; CHECK-NEXT: %iv.mul3 = mul nuw nsw i32 %iv, 3
|
||||
; CHECK-NEXT: %iv.scaled = add nuw nsw i32 %iv.mul3, 20
|
||||
; CHECK-NEXT: %iv.mul7 = mul nuw nsw i32 %iv, 7
|
||||
; CHECK-NEXT: tail call void @bar(i32 %iv.mul7)
|
||||
|
||||
; induction variable
|
||||
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
|
||||
|
||||
; scale instruction
|
||||
%iv.mul3 = mul nuw nsw i32 %iv, 3
|
||||
|
||||
; extra simple arithmetic operations, used by root instructions
|
||||
%iv.scaled = add nuw nsw i32 %iv.mul3, 20
|
||||
|
||||
; unrerollable instructions using %iv
|
||||
%iv.mul7 = mul nuw nsw i32 %iv, 7
|
||||
tail call void @bar(i32 %iv.mul7)
|
||||
|
||||
; root set 1
|
||||
|
||||
; base instruction
|
||||
%iv.scaled.div5 = udiv i32 %iv.scaled, 5
|
||||
tail call void @bar(i32 %iv.scaled.div5)
|
||||
; root instruction 0
|
||||
%iv.scaled.add1 = add nuw nsw i32 %iv.scaled, 1
|
||||
%iv.scaled.add1.div5 = udiv i32 %iv.scaled.add1, 5
|
||||
tail call void @bar(i32 %iv.scaled.add1.div5)
|
||||
; root instruction 2
|
||||
%iv.scaled.add2 = add nuw nsw i32 %iv.scaled, 2
|
||||
%iv.scaled.add2.div5 = udiv i32 %iv.scaled.add2, 5
|
||||
tail call void @bar(i32 %iv.scaled.add2.div5)
|
||||
|
||||
; root set 2
|
||||
|
||||
; base instruction
|
||||
%iv.scaled.add4 = add nuw nsw i32 %iv.scaled, 4
|
||||
%iv.scaled.add4.div5 = udiv i32 %iv.scaled.add4, 5
|
||||
tail call void @bar(i32 %iv.scaled.add4.div5)
|
||||
; root instruction 0
|
||||
%iv.scaled.add5 = add nuw nsw i32 %iv.scaled, 5
|
||||
%iv.scaled.add5.div5 = udiv i32 %iv.scaled.add5, 5
|
||||
tail call void @bar(i32 %iv.scaled.add5.div5)
|
||||
; root instruction 2
|
||||
%iv.scaled.add6 = add nuw nsw i32 %iv.scaled, 6
|
||||
%iv.scaled.add6.div5 = udiv i32 %iv.scaled.add6, 5
|
||||
tail call void @bar(i32 %iv.scaled.add6.div5)
|
||||
|
||||
; loop-increment
|
||||
%iv.next = add nuw nsw i32 %iv, 1
|
||||
|
||||
; latch
|
||||
%cmp = icmp ult i32 %iv.next, 3
|
||||
br i1 %cmp, label %loop, label %exit
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
declare dso_local void @bar(i32)
|
Loading…
Reference in New Issue