From e70d5dcf3eb9f2cfe3c0e6bd9d516e733a54fc81 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 17 Mar 2017 20:52:21 +0000 Subject: [PATCH] AMDGPU: Fix handling of constant phi input loop conditions If the loop condition was an i1 phi with a constantexpr input, this would add a loop intrinsic fed by a phi dependent on a call to if.break in the same block. Insert the call in the loop header. llvm-svn: 298121 --- .../Target/AMDGPU/SIAnnotateControlFlow.cpp | 13 +- llvm/test/CodeGen/AMDGPU/loop_break.ll | 258 ++++++++++++++++++ 2 files changed, 266 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp index db2e17abc028..c9834bf53648 100644 --- a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp @@ -220,7 +220,7 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken, if ((Phi = dyn_cast(Cond)) && L->contains(Phi)) { BasicBlock *Parent = Phi->getParent(); - PHINode *NewPhi = PHINode::Create(Int64, 0, "", &Parent->front()); + PHINode *NewPhi = PHINode::Create(Int64, 0, "loop.phi", &Parent->front()); Value *Ret = NewPhi; // Handle all non-constant incoming values first @@ -293,10 +293,13 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken, return CallInst::Create(IfBreak, Args, "", Insert); } - // Insert IfBreak before TERM for constant COND. - if (isa(Cond)) { + // Insert IfBreak in the loop header TERM for constant COND other than true. + if (isa(Cond)) { + Instruction *Insert = Cond == BoolTrue ? + Term : L->getHeader()->getTerminator(); + Value *Args[] = { Cond, Broken }; - return CallInst::Create(IfBreak, Args, "", Term); + return CallInst::Create(IfBreak, Args, "", Insert); } llvm_unreachable("Unhandled loop condition!"); @@ -313,7 +316,7 @@ void SIAnnotateControlFlow::handleLoop(BranchInst *Term) { return; BasicBlock *Target = Term->getSuccessor(1); - PHINode *Broken = PHINode::Create(Int64, 0, "", &Target->front()); + PHINode *Broken = PHINode::Create(Int64, 0, "phi.broken", &Target->front()); Value *Cond = Term->getCondition(); Term->setCondition(BoolTrue); diff --git a/llvm/test/CodeGen/AMDGPU/loop_break.ll b/llvm/test/CodeGen/AMDGPU/loop_break.ll index 82564b8bb28d..97212f5dc12f 100644 --- a/llvm/test/CodeGen/AMDGPU/loop_break.ll +++ b/llvm/test/CodeGen/AMDGPU/loop_break.ll @@ -64,6 +64,264 @@ bb9: ret void } +; OPT-LABEL: @undef_phi_cond_break_loop( +; OPT: bb1: +; OPT-NEXT: %phi.broken = phi i64 [ %loop.phi, %Flow ], [ 0, %bb ] +; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] +; OPT: %0 = call i64 @llvm.amdgcn.if.break(i1 undef, i64 %phi.broken) +; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow + +; OPT: bb4: +; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4 +; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load +; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break(i1 %cmp1, i64 %phi.broken) +; OPT-NEXT: br label %Flow + +; OPT: Flow: +; OPT-NEXT: %loop.phi = phi i64 [ %1, %bb4 ], [ %0, %bb1 ] +; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] +; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop(i64 %loop.phi) +; OPT-NEXT: br i1 %2, label %bb9, label %bb1 + +; OPT: bb9: ; preds = %Flow +; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi) +; OPT-NEXT: store volatile i32 7 +; OPT-NEXT: ret void +define void @undef_phi_cond_break_loop(i32 %arg) #0 { +bb: + %id = call i32 @llvm.amdgcn.workitem.id.x() + %tmp = sub i32 %id, %arg + br label %bb1 + +bb1: ; preds = %Flow, %bb + %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] + %lsr.iv.next = add i32 %lsr.iv, 1 + %cmp0 = icmp slt i32 %lsr.iv.next, 0 + br i1 %cmp0, label %bb4, label %Flow + +bb4: ; preds = %bb1 + %load = load volatile i32, i32 addrspace(1)* undef, align 4 + %cmp1 = icmp sge i32 %tmp, %load + br label %Flow + +Flow: ; preds = %bb4, %bb1 + %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] + %tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ] + br i1 %tmp3, label %bb9, label %bb1 + +bb9: ; preds = %Flow + store volatile i32 7, i32 addrspace(3)* undef + ret void +} + +; FIXME: ConstantExpr compare of address to null folds away +@lds = addrspace(3) global i32 undef + +; OPT-LABEL: @constexpr_phi_cond_break_loop( +; OPT: bb1: +; OPT-NEXT: %phi.broken = phi i64 [ %loop.phi, %Flow ], [ 0, %bb ] +; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] +; OPT: %0 = call i64 @llvm.amdgcn.if.break(i1 icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), i64 %phi.broken) +; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow + +; OPT: bb4: +; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4 +; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load +; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break(i1 %cmp1, i64 %phi.broken) +; OPT-NEXT: br label %Flow + +; OPT: Flow: +; OPT-NEXT: %loop.phi = phi i64 [ %1, %bb4 ], [ %0, %bb1 ] +; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] +; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop(i64 %loop.phi) +; OPT-NEXT: br i1 %2, label %bb9, label %bb1 + +; OPT: bb9: ; preds = %Flow +; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi) +; OPT-NEXT: store volatile i32 7 +; OPT-NEXT: ret void +define void @constexpr_phi_cond_break_loop(i32 %arg) #0 { +bb: + %id = call i32 @llvm.amdgcn.workitem.id.x() + %tmp = sub i32 %id, %arg + br label %bb1 + +bb1: ; preds = %Flow, %bb + %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] + %lsr.iv.next = add i32 %lsr.iv, 1 + %cmp0 = icmp slt i32 %lsr.iv.next, 0 + br i1 %cmp0, label %bb4, label %Flow + +bb4: ; preds = %bb1 + %load = load volatile i32, i32 addrspace(1)* undef, align 4 + %cmp1 = icmp sge i32 %tmp, %load + br label %Flow + +Flow: ; preds = %bb4, %bb1 + %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] + %tmp3 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), %bb1 ] + br i1 %tmp3, label %bb9, label %bb1 + +bb9: ; preds = %Flow + store volatile i32 7, i32 addrspace(3)* undef + ret void +} + +; OPT-LABEL: @true_phi_cond_break_loop( +; OPT: bb1: +; OPT-NEXT: %phi.broken = phi i64 [ %loop.phi, %Flow ], [ 0, %bb ] +; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] +; OPT: %0 = call i64 @llvm.amdgcn.break(i64 %phi.broken) +; OPT: br i1 %cmp0, label %bb4, label %Flow + +; OPT: bb4: +; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4 +; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load +; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break(i1 %cmp1, i64 %phi.broken) +; OPT-NEXT: br label %Flow + +; OPT: Flow: +; OPT-NEXT: %loop.phi = phi i64 [ %1, %bb4 ], [ %0, %bb1 ] +; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] +; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop(i64 %loop.phi) +; OPT-NEXT: br i1 %2, label %bb9, label %bb1 + +; OPT: bb9: ; preds = %Flow +; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi) +; OPT-NEXT: store volatile i32 7 +; OPT-NEXT: ret void +define void @true_phi_cond_break_loop(i32 %arg) #0 { +bb: + %id = call i32 @llvm.amdgcn.workitem.id.x() + %tmp = sub i32 %id, %arg + br label %bb1 + +bb1: ; preds = %Flow, %bb + %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] + %lsr.iv.next = add i32 %lsr.iv, 1 + %cmp0 = icmp slt i32 %lsr.iv.next, 0 + br i1 %cmp0, label %bb4, label %Flow + +bb4: ; preds = %bb1 + %load = load volatile i32, i32 addrspace(1)* undef, align 4 + %cmp1 = icmp sge i32 %tmp, %load + br label %Flow + +Flow: ; preds = %bb4, %bb1 + %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] + %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ] + br i1 %tmp3, label %bb9, label %bb1 + +bb9: ; preds = %Flow + store volatile i32 7, i32 addrspace(3)* undef + ret void +} + +; OPT-LABEL: @false_phi_cond_break_loop( +; OPT: bb1: +; OPT-NEXT: %phi.broken = phi i64 [ %loop.phi, %Flow ], [ 0, %bb ] +; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] +; OPT-NOT: call +; OPT: br i1 %cmp0, label %bb4, label %Flow + +; OPT: bb4: +; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4 +; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load +; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break(i1 %cmp1, i64 %phi.broken) +; OPT-NEXT: br label %Flow + +; OPT: Flow: +; OPT-NEXT: %loop.phi = phi i64 [ %0, %bb4 ], [ %phi.broken, %bb1 ] +; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] +; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop(i64 %loop.phi) +; OPT-NEXT: br i1 %1, label %bb9, label %bb1 + +; OPT: bb9: ; preds = %Flow +; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi) +; OPT-NEXT: store volatile i32 7 +; OPT-NEXT: ret void +define void @false_phi_cond_break_loop(i32 %arg) #0 { +bb: + %id = call i32 @llvm.amdgcn.workitem.id.x() + %tmp = sub i32 %id, %arg + br label %bb1 + +bb1: ; preds = %Flow, %bb + %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] + %lsr.iv.next = add i32 %lsr.iv, 1 + %cmp0 = icmp slt i32 %lsr.iv.next, 0 + br i1 %cmp0, label %bb4, label %Flow + +bb4: ; preds = %bb1 + %load = load volatile i32, i32 addrspace(1)* undef, align 4 + %cmp1 = icmp sge i32 %tmp, %load + br label %Flow + +Flow: ; preds = %bb4, %bb1 + %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] + %tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ] + br i1 %tmp3, label %bb9, label %bb1 + +bb9: ; preds = %Flow + store volatile i32 7, i32 addrspace(3)* undef + ret void +} + +; Swap order of branches in flow block so that the true phi is +; continue. + +; OPT-LABEL: @invert_true_phi_cond_break_loop( +; OPT: bb1: +; OPT-NEXT: %phi.broken = phi i64 [ %1, %Flow ], [ 0, %bb ] +; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] +; OPT-NEXT: %lsr.iv.next = add i32 %lsr.iv, 1 +; OPT-NEXT: %cmp0 = icmp slt i32 %lsr.iv.next, 0 +; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow + +; OPT: bb4: +; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4 +; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load +; OPT-NEXT: br label %Flow + +; OPT: Flow: +; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] +; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ] +; OPT-NEXT: %0 = xor i1 %tmp3, true +; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break(i1 %0, i64 %phi.broken) +; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop(i64 %1) +; OPT-NEXT: br i1 %2, label %bb9, label %bb1 + +; OPT: bb9: +; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %1) +; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef +; OPT-NEXT: ret void +define void @invert_true_phi_cond_break_loop(i32 %arg) #0 { +bb: + %id = call i32 @llvm.amdgcn.workitem.id.x() + %tmp = sub i32 %id, %arg + br label %bb1 + +bb1: ; preds = %Flow, %bb + %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] + %lsr.iv.next = add i32 %lsr.iv, 1 + %cmp0 = icmp slt i32 %lsr.iv.next, 0 + br i1 %cmp0, label %bb4, label %Flow + +bb4: ; preds = %bb1 + %load = load volatile i32, i32 addrspace(1)* undef, align 4 + %cmp1 = icmp sge i32 %tmp, %load + br label %Flow + +Flow: ; preds = %bb4, %bb1 + %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] + %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ] + br i1 %tmp3, label %bb1, label %bb9 + +bb9: ; preds = %Flow + store volatile i32 7, i32 addrspace(3)* undef + ret void +} + declare i32 @llvm.amdgcn.workitem.id.x() #1 attributes #0 = { nounwind }