StructurizeCFG: Directly invert cmp instructions

The most common case for a branch condition is
a single use compare. Directly invert the branch
predicate rather than adding a lot of xor i1 true
which the DAG will have to fold later.

This produces nicer to read structurizer output.

This produces some random changes in codegen
due to the DAG swapping branch conditions itself,
and then does a poor job of dealing with those
inverts.

llvm-svn: 300732
This commit is contained in:
Matt Arsenault 2017-04-19 18:29:07 +00:00
parent 5945447d84
commit d3406bc45c
8 changed files with 184 additions and 112 deletions

View File

@ -352,10 +352,20 @@ Value *StructurizeCFG::invert(Value *Condition) {
if (Instruction *Inst = dyn_cast<Instruction>(Condition)) {
// Third: Check all the users for an invert
BasicBlock *Parent = Inst->getParent();
for (User *U : Condition->users())
if (Instruction *I = dyn_cast<Instruction>(U))
for (User *U : Condition->users()) {
if (Instruction *I = dyn_cast<Instruction>(U)) {
if (I->getParent() == Parent && match(I, m_Not(m_Specific(Condition))))
return I;
}
}
// Avoid creating a new instruction in the common case of a compare.
if (CmpInst *Cmp = dyn_cast<CmpInst>(Inst)) {
if (Cmp->hasOneUse()) {
Cmp->setPredicate(Cmp->getInversePredicate());
return Cmp;
}
}
// Last option: Create a new instruction
return BinaryOperator::CreateNot(Condition, "", Parent->getTerminator());

View File

@ -10,7 +10,7 @@
; OPT: bb4:
; OPT: load volatile
; OPT: xor i1 %cmp1
; OPT: %cmp1 = icmp sge i32 %tmp, %load
; OPT: call i64 @llvm.amdgcn.if.break(
; OPT: br label %Flow

View File

@ -9,18 +9,19 @@
; StructurizeCFG.
; IR-LABEL: @multi_divergent_region_exit_ret_ret(
; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0)
; IR: %2 = extractvalue { i1, i64 } %1, 0
; IR: %3 = extractvalue { i1, i64 } %1, 1
; IR: br i1 %2, label %LeafBlock1, label %Flow
; IR: %Pivot = icmp sge i32 %tmp16, 2
; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %Pivot)
; IR: %1 = extractvalue { i1, i64 } %0, 0
; IR: %2 = extractvalue { i1, i64 } %0, 1
; IR: br i1 %1, label %LeafBlock1, label %Flow
; IR: Flow:
; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ]
; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
; IR: %7 = extractvalue { i1, i64 } %6, 0
; IR: %8 = extractvalue { i1, i64 } %6, 1
; IR: br i1 %7, label %LeafBlock, label %Flow1
; IR: %3 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
; IR: %4 = phi i1 [ %SwitchLeaf2, %LeafBlock1 ], [ false, %entry ]
; IR: %5 = call { i1, i64 } @llvm.amdgcn.else(i64 %2)
; IR: %6 = extractvalue { i1, i64 } %5, 0
; IR: %7 = extractvalue { i1, i64 } %5, 1
; IR: br i1 %6, label %LeafBlock, label %Flow1
; IR: LeafBlock:
; IR: br label %Flow1
@ -29,32 +30,32 @@
; IR: br label %Flow{{$}}
; IR: Flow2:
; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
; IR: call void @llvm.amdgcn.end.cf(i64 %19)
; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
; IR: %13 = extractvalue { i1, i64 } %12, 0
; IR: %14 = extractvalue { i1, i64 } %12, 1
; IR: br i1 %13, label %exit0, label %UnifiedReturnBlock
; IR: %8 = phi i1 [ false, %exit1 ], [ %12, %Flow1 ]
; IR: call void @llvm.amdgcn.end.cf(i64 %16)
; IR: [[IF:%[0-9]+]] = call { i1, i64 } @llvm.amdgcn.if(i1 %8)
; IR: %10 = extractvalue { i1, i64 } [[IF]], 0
; IR: %11 = extractvalue { i1, i64 } [[IF]], 1
; IR: br i1 %10, label %exit0, label %UnifiedReturnBlock
; IR: exit0:
; IR: store volatile i32 9, i32 addrspace(1)* undef
; IR: br label %UnifiedReturnBlock
; IR: Flow1:
; IR: %15 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %4, %Flow ]
; IR: %16 = phi i1 [ %9, %LeafBlock ], [ %5, %Flow ]
; IR: call void @llvm.amdgcn.end.cf(i64 %8)
; IR: %17 = call { i1, i64 } @llvm.amdgcn.if(i1 %16)
; IR: %18 = extractvalue { i1, i64 } %17, 0
; IR: %19 = extractvalue { i1, i64 } %17, 1
; IR: br i1 %18, label %exit1, label %Flow2
; IR: %12 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %3, %Flow ]
; IR: %13 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %4, %Flow ]
; IR: call void @llvm.amdgcn.end.cf(i64 %7)
; IR: %14 = call { i1, i64 } @llvm.amdgcn.if(i1 %13)
; IR: %15 = extractvalue { i1, i64 } %14, 0
; IR: %16 = extractvalue { i1, i64 } %14, 1
; IR: br i1 %15, label %exit1, label %Flow2
; IR: exit1:
; IR: store volatile i32 17, i32 addrspace(3)* undef
; IR: br label %Flow2
; IR: UnifiedReturnBlock:
; IR: call void @llvm.amdgcn.end.cf(i64 %14)
; IR: call void @llvm.amdgcn.end.cf(i64 %11)
; IR: ret void
@ -64,11 +65,9 @@
; GCN: s_xor_b64
; FIXME: Why is this compare essentially repeated?
; GCN: v_cmp_eq_u32_e32 vcc, 1, [[REG:v[0-9]+]]
; GCN-NEXT: v_cmp_ne_u32_e64 s{{\[[0-9]+:[0-9]+\]}}, 1, [[REG]]
; GCN: ; %LeafBlock
; GCN: v_cmp_ne_u32_e32 vcc, 1, [[REG:v[0-9]+]]
; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, -1, vcc
; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, -1
; GCN: ; %Flow1
; GCN-NEXT: s_or_b64 exec, exec
@ -126,14 +125,15 @@ exit1: ; preds = %LeafBlock, %LeafBlock1
}
; IR-LABEL: @multi_divergent_region_exit_unreachable_unreachable(
; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0)
; IR: %Pivot = icmp sge i32 %tmp16, 2
; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %Pivot)
; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
; IR: %5 = call { i1, i64 } @llvm.amdgcn.else(i64 %2)
; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
; IR: call void @llvm.amdgcn.end.cf(i64 %19)
; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
; IR: br i1 %13, label %exit0, label %UnifiedUnreachableBlock
; IR: %8 = phi i1 [ false, %exit1 ], [ %12, %Flow1 ]
; IR: call void @llvm.amdgcn.end.cf(i64 %16)
; IR: %9 = call { i1, i64 } @llvm.amdgcn.if(i1 %8)
; IR: br i1 %10, label %exit0, label %UnifiedUnreachableBlock
; IR: UnifiedUnreachableBlock:
@ -181,51 +181,49 @@ exit1: ; preds = %LeafBlock, %LeafBlock1
}
; IR-LABEL: @multi_exit_region_divergent_ret_uniform_ret(
; IR: %divergent.cond0 = icmp slt i32 %tmp16, 2
; IR: %divergent.cond0 = icmp sge i32 %tmp16, 2
; IR: llvm.amdgcn.if
; IR: br i1
; IR: {{^}}Flow:
; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ]
; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
; IR: br i1 %7, label %LeafBlock, label %Flow1
; IR: %3 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
; IR: %4 = phi i1 [ %uniform.cond0, %LeafBlock1 ], [ false, %entry ]
; IR: %5 = call { i1, i64 } @llvm.amdgcn.else(i64 %2)
; IR: br i1 %6, label %LeafBlock, label %Flow1
; IR: {{^}}LeafBlock:
; IR: %divergent.cond1 = icmp eq i32 %tmp16, 1
; IR: %9 = xor i1 %divergent.cond1, true
; IR: %divergent.cond1 = icmp ne i32 %tmp16, 1
; IR: br label %Flow1
; IR: LeafBlock1:
; IR: %uniform.cond0 = icmp eq i32 %arg3, 2
; IR: %10 = xor i1 %uniform.cond0, true
; IR: %uniform.cond0 = icmp ne i32 %arg3, 2
; IR: br label %Flow
; IR: Flow2:
; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
; IR: call void @llvm.amdgcn.end.cf(i64 %19)
; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
; IR: br i1 %13, label %exit0, label %UnifiedReturnBlock
; IR: %8 = phi i1 [ false, %exit1 ], [ %12, %Flow1 ]
; IR: call void @llvm.amdgcn.end.cf(i64 %16)
; IR: %9 = call { i1, i64 } @llvm.amdgcn.if(i1 %8)
; IR: br i1 %10, label %exit0, label %UnifiedReturnBlock
; IR: exit0:
; IR: store volatile i32 9, i32 addrspace(1)* undef
; IR: br label %UnifiedReturnBlock
; IR: {{^}}Flow1:
; IR: %15 = phi i1 [ %divergent.cond1, %LeafBlock ], [ %4, %Flow ]
; IR: %16 = phi i1 [ %9, %LeafBlock ], [ %5, %Flow ]
; IR: call void @llvm.amdgcn.end.cf(i64 %8)
; IR: %17 = call { i1, i64 } @llvm.amdgcn.if(i1 %16)
; IR: %18 = extractvalue { i1, i64 } %17, 0
; IR: %19 = extractvalue { i1, i64 } %17, 1
; IR: br i1 %18, label %exit1, label %Flow2
; IR: %12 = phi i1 [ %divergent.cond1, %LeafBlock ], [ %3, %Flow ]
; IR: %13 = phi i1 [ %divergent.cond1, %LeafBlock ], [ %4, %Flow ]
; IR: call void @llvm.amdgcn.end.cf(i64 %7)
; IR: %14 = call { i1, i64 } @llvm.amdgcn.if(i1 %13)
; IR: %15 = extractvalue { i1, i64 } %14, 0
; IR: %16 = extractvalue { i1, i64 } %14, 1
; IR: br i1 %15, label %exit1, label %Flow2
; IR: exit1:
; IR: store volatile i32 17, i32 addrspace(3)* undef
; IR: br label %Flow2
; IR: UnifiedReturnBlock:
; IR: call void @llvm.amdgcn.end.cf(i64 %14)
; IR: call void @llvm.amdgcn.end.cf(i64 %11)
; IR: ret void
define amdgpu_kernel void @multi_exit_region_divergent_ret_uniform_ret(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2, i32 %arg3) #0 {
entry:
@ -264,17 +262,18 @@ exit1: ; preds = %LeafBlock, %LeafBlock1
}
; IR-LABEL: @multi_exit_region_uniform_ret_divergent_ret(
; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0)
; IR: br i1 %2, label %LeafBlock1, label %Flow
; IR: %Pivot = icmp sge i32 %tmp16, 2
; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %Pivot)
; IR: br i1 %1, label %LeafBlock1, label %Flow
; IR: Flow:
; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ]
; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
; IR: %3 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
; IR: %4 = phi i1 [ %SwitchLeaf2, %LeafBlock1 ], [ false, %entry ]
; IR: %5 = call { i1, i64 } @llvm.amdgcn.else(i64 %2)
; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
; IR: call void @llvm.amdgcn.end.cf(i64 %19)
; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
; IR: %8 = phi i1 [ false, %exit1 ], [ %12, %Flow1 ]
; IR: call void @llvm.amdgcn.end.cf(i64 %16)
; IR: %9 = call { i1, i64 } @llvm.amdgcn.if(i1 %8)
define amdgpu_kernel void @multi_exit_region_uniform_ret_divergent_ret(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2, i32 %arg3) #0 {
entry:
@ -314,13 +313,13 @@ exit1: ; preds = %LeafBlock, %LeafBlock1
; IR-LABEL: @multi_divergent_region_exit_ret_ret_return_value(
; IR: Flow2:
; IR: %11 = phi float [ 2.000000e+00, %exit1 ], [ undef, %Flow1 ]
; IR: %12 = phi i1 [ false, %exit1 ], [ %16, %Flow1 ]
; IR: call void @llvm.amdgcn.end.cf(i64 %20)
; IR: %8 = phi float [ 2.000000e+00, %exit1 ], [ undef, %Flow1 ]
; IR: %9 = phi i1 [ false, %exit1 ], [ %13, %Flow1 ]
; IR: call void @llvm.amdgcn.end.cf(i64 %17)
; IR: UnifiedReturnBlock:
; IR: %UnifiedRetVal = phi float [ %11, %Flow2 ], [ 1.000000e+00, %exit0 ]
; IR: call void @llvm.amdgcn.end.cf(i64 %15)
; IR: %UnifiedRetVal = phi float [ %8, %Flow2 ], [ 1.000000e+00, %exit0 ]
; IR: call void @llvm.amdgcn.end.cf(i64 %12)
; IR: ret float %UnifiedRetVal
define amdgpu_ps float @multi_divergent_region_exit_ret_ret_return_value(i32 %vgpr) #0 {
entry:
@ -387,31 +386,32 @@ exit1: ; preds = %LeafBlock, %LeafBlock1
}
; IR-LABEL: @multi_divergent_region_exit_ret_unreachable(
; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0)
; IR: %Pivot = icmp sge i32 %tmp16, 2
; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %Pivot)
; IR: Flow:
; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ]
; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
; IR: %3 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
; IR: %4 = phi i1 [ %SwitchLeaf2, %LeafBlock1 ], [ false, %entry ]
; IR: %5 = call { i1, i64 } @llvm.amdgcn.else(i64 %2)
; IR: Flow2:
; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
; IR: call void @llvm.amdgcn.end.cf(i64 %19)
; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
; IR: br i1 %13, label %exit0, label %UnifiedReturnBlock
; IR: %8 = phi i1 [ false, %exit1 ], [ %12, %Flow1 ]
; IR: call void @llvm.amdgcn.end.cf(i64 %16)
; IR: %9 = call { i1, i64 } @llvm.amdgcn.if(i1 %8)
; IR: br i1 %10, label %exit0, label %UnifiedReturnBlock
; IR: exit0:
; IR-NEXT: store volatile i32 17, i32 addrspace(3)* undef
; IR-NEXT: br label %UnifiedReturnBlock
; IR: Flow1:
; IR: %15 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %4, %Flow ]
; IR: %16 = phi i1 [ %9, %LeafBlock ], [ %5, %Flow ]
; IR: call void @llvm.amdgcn.end.cf(i64 %8)
; IR: %17 = call { i1, i64 } @llvm.amdgcn.if(i1 %16)
; IR: %18 = extractvalue { i1, i64 } %17, 0
; IR: %19 = extractvalue { i1, i64 } %17, 1
; IR: br i1 %18, label %exit1, label %Flow2
; IR: %12 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %3, %Flow ]
; IR: %13 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %4, %Flow ]
; IR: call void @llvm.amdgcn.end.cf(i64 %7)
; IR: %14 = call { i1, i64 } @llvm.amdgcn.if(i1 %13)
; IR: %15 = extractvalue { i1, i64 } %14, 0
; IR: %16 = extractvalue { i1, i64 } %14, 1
; IR: br i1 %15, label %exit1, label %Flow2
; IR: exit1:
; IR-NEXT: store volatile i32 9, i32 addrspace(1)* undef
@ -419,7 +419,7 @@ exit1: ; preds = %LeafBlock, %LeafBlock1
; IR-NEXT: br label %Flow2
; IR: UnifiedReturnBlock:
; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %14)
; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %11)
; IR-NEXT: ret void
define amdgpu_kernel void @multi_divergent_region_exit_ret_unreachable(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2) #0 {
entry:
@ -475,7 +475,7 @@ exit1: ; preds = %LeafBlock, %LeafBlock1
; IR-NEXT: br label %Flow2
; IR: UnifiedReturnBlock: ; preds = %exit0, %Flow2
; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %14)
; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %11)
; IR-NEXT: ret void
define amdgpu_kernel void @indirect_multi_divergent_region_exit_ret_unreachable(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2) #0 {
entry:
@ -622,15 +622,15 @@ uniform.ret:
; IR-LABEL: @uniform_complex_multi_ret_nest_in_divergent_triangle(
; IR: Flow1: ; preds = %uniform.ret1, %uniform.multi.exit.region
; IR: %8 = phi i1 [ false, %uniform.ret1 ], [ true, %uniform.multi.exit.region ]
; IR: br i1 %8, label %uniform.if, label %Flow2
; IR: %6 = phi i1 [ false, %uniform.ret1 ], [ true, %uniform.multi.exit.region ]
; IR: br i1 %6, label %uniform.if, label %Flow2
; IR: Flow: ; preds = %uniform.then, %uniform.if
; IR: %11 = phi i1 [ %10, %uniform.then ], [ %9, %uniform.if ]
; IR: br i1 %11, label %uniform.endif, label %uniform.ret0
; IR: %7 = phi i1 [ %uniform.cond2, %uniform.then ], [ %uniform.cond1, %uniform.if ]
; IR: br i1 %7, label %uniform.endif, label %uniform.ret0
; IR: UnifiedReturnBlock: ; preds = %Flow3, %Flow2
; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %6)
; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %5)
; IR-NEXT: ret void
define amdgpu_kernel void @uniform_complex_multi_ret_nest_in_divergent_triangle(i32 %arg0) #0 {
entry:

View File

@ -133,9 +133,9 @@ bb23: ; preds = %bb10
; IR: Flow1:
; IR-NEXT: %loop.phi = phi i64 [ %loop.phi9, %Flow6 ], [ %phi.broken, %bb14 ]
; IR-NEXT: %13 = phi <4 x i32> [ %29, %Flow6 ], [ undef, %bb14 ]
; IR-NEXT: %14 = phi i32 [ %30, %Flow6 ], [ undef, %bb14 ]
; IR-NEXT: %15 = phi i1 [ %31, %Flow6 ], [ false, %bb14 ]
; IR-NEXT: %13 = phi <4 x i32> [ %28, %Flow6 ], [ undef, %bb14 ]
; IR-NEXT: %14 = phi i32 [ %29, %Flow6 ], [ undef, %bb14 ]
; IR-NEXT: %15 = phi i1 [ %30, %Flow6 ], [ false, %bb14 ]
; IR-NEXT: %16 = phi i1 [ false, %Flow6 ], [ %8, %bb14 ]
; IR-NEXT: %17 = call i64 @llvm.amdgcn.else.break(i64 %11, i64 %loop.phi)
; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %11)
@ -144,9 +144,9 @@ bb23: ; preds = %bb10
; IR: Flow2:
; IR-NEXT: %loop.phi10 = phi i64 [ %loop.phi11, %Flow5 ], [ %12, %bb16 ]
; IR-NEXT: %19 = phi <4 x i32> [ %29, %Flow5 ], [ undef, %bb16 ]
; IR-NEXT: %20 = phi i32 [ %30, %Flow5 ], [ undef, %bb16 ]
; IR-NEXT: %21 = phi i1 [ %31, %Flow5 ], [ false, %bb16 ]
; IR-NEXT: %19 = phi <4 x i32> [ %28, %Flow5 ], [ undef, %bb16 ]
; IR-NEXT: %20 = phi i32 [ %29, %Flow5 ], [ undef, %bb16 ]
; IR-NEXT: %21 = phi i1 [ %30, %Flow5 ], [ false, %bb16 ]
; IR-NEXT: %22 = phi i1 [ false, %Flow5 ], [ false, %bb16 ]
; IR-NEXT: %23 = phi i1 [ false, %Flow5 ], [ %8, %bb16 ]
; IR-NEXT: %24 = call { i1, i64 } @llvm.amdgcn.if(i1 %23)
@ -156,16 +156,15 @@ bb23: ; preds = %bb10
; IR: bb21:
; IR: %tmp12 = icmp slt i32 %tmp11, 9
; IR-NEXT: %27 = xor i1 %tmp12, true
; IR-NEXT: %28 = call i64 @llvm.amdgcn.if.break(i1 %27, i64 %phi.broken)
; IR-NEXT: %27 = call i64 @llvm.amdgcn.if.break(i1 %tmp12, i64 %phi.broken)
; IR-NEXT: br label %Flow3
; IR: Flow3:
; IR-NEXT: %loop.phi11 = phi i64 [ %phi.broken, %bb21 ], [ %phi.broken, %Flow2 ]
; IR-NEXT: %loop.phi9 = phi i64 [ %28, %bb21 ], [ %loop.phi10, %Flow2 ]
; IR-NEXT: %29 = phi <4 x i32> [ %tmp9, %bb21 ], [ %19, %Flow2 ]
; IR-NEXT: %30 = phi i32 [ %tmp10, %bb21 ], [ %20, %Flow2 ]
; IR-NEXT: %31 = phi i1 [ %27, %bb21 ], [ %21, %Flow2 ]
; IR-NEXT: %loop.phi9 = phi i64 [ %27, %bb21 ], [ %loop.phi10, %Flow2 ]
; IR-NEXT: %28 = phi <4 x i32> [ %tmp9, %bb21 ], [ %19, %Flow2 ]
; IR-NEXT: %29 = phi i32 [ %tmp10, %bb21 ], [ %20, %Flow2 ]
; IR-NEXT: %30 = phi i1 [ %tmp12, %bb21 ], [ %21, %Flow2 ]
; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %26)
; IR-NEXT: br i1 %22, label %bb31.loopexit, label %Flow4

View File

@ -56,7 +56,7 @@ ret.bb: ; preds = %else, %main_body
}
; GCN-LABEL: {{^}}uniform_br_nontrivial_ret_divergent_br_nontrivial_unreachable:
; GCN: s_cbranch_vccnz [[RET_BB:BB[0-9]+_[0-9]+]]
; GCN: s_cbranch_scc1 [[RET_BB:BB[0-9]+_[0-9]+]]
; GCN: ; BB#{{[0-9]+}}: ; %else
; GCN: s_and_saveexec_b64 [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]], vcc

View File

@ -0,0 +1,60 @@
; RUN: opt -S -structurizecfg %s | FileCheck %s
; CHECK-LABEL: @directly_invert_compare_condition_jump_into_loop(
; CHECK: %cmp0 = fcmp uge float %arg0, %arg1
; CHECK-NEXT: br i1 %cmp0, label %end.loop, label %Flow
define void @directly_invert_compare_condition_jump_into_loop(i32 addrspace(1)* %out, i32 %n, float %arg0, float %arg1) #0 {
entry:
br label %for.body
for.body:
%i = phi i32 [0, %entry], [%i.inc, %end.loop]
%ptr = getelementptr i32, i32 addrspace(1)* %out, i32 %i
store i32 %i, i32 addrspace(1)* %ptr, align 4
%cmp0 = fcmp olt float %arg0, %arg1
br i1 %cmp0, label %mid.loop, label %end.loop
mid.loop:
store i32 333, i32 addrspace(1)* %out, align 4
br label %for.end
end.loop:
%i.inc = add i32 %i, 1
%cmp = icmp ne i32 %i.inc, %n
br i1 %cmp, label %for.body, label %for.end
for.end:
ret void
}
; CHECK-LABEL: @invert_multi_use_compare_condition_jump_into_loop(
; CHECK: %cmp0 = fcmp olt float %arg0, %arg1
; CHECK: store volatile i1 %cmp0, i1 addrspace(1)* undef
; CHECK: %0 = xor i1 %cmp0, true
; CHECK-NEXT: br i1 %0, label %end.loop, label %Flow
define void @invert_multi_use_compare_condition_jump_into_loop(i32 addrspace(1)* %out, i32 %n, float %arg0, float %arg1) #0 {
entry:
br label %for.body
for.body:
%i = phi i32 [0, %entry], [%i.inc, %end.loop]
%ptr = getelementptr i32, i32 addrspace(1)* %out, i32 %i
store i32 %i, i32 addrspace(1)* %ptr, align 4
%cmp0 = fcmp olt float %arg0, %arg1
store volatile i1 %cmp0, i1 addrspace(1)* undef
br i1 %cmp0, label %mid.loop, label %end.loop
mid.loop:
store i32 333, i32 addrspace(1)* %out, align 4
br label %for.end
end.loop:
%i.inc = add i32 %i, 1
%cmp = icmp ne i32 %i.inc, %n
br i1 %cmp, label %for.body, label %for.end
for.end:
ret void
}
attributes #0 = { nounwind }

View File

@ -11,8 +11,8 @@ bb:
bb3: ; preds = %bb7, %bb
%tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb7 ]
%tmp4 = fcmp ult float %arg1, 3.500000e+00
; CHECK: %0 = xor i1 %tmp4, true
; CHECK: br i1 %0, label %bb5, label %Flow
; CHECK: %tmp4 = fcmp oge float %arg1, 3.500000e+00
; CHECK: br i1 %tmp4, label %bb5, label %Flow
br i1 %tmp4, label %bb7, label %bb5
; CHECK: bb5:
@ -22,7 +22,8 @@ bb5: ; preds = %bb3
br i1 %tmp6, label %bb10, label %bb7
; CHECK: Flow:
; CHECK: br i1 %3, label %bb7, label %Flow1
; CHECK: %1 = phi i1 [ %tmp6, %bb5 ], [ %tmp4, %bb3 ]
; CHECK-NEXT: br i1 %1, label %bb7, label %Flow1
; CHECK: bb7
bb7: ; preds = %bb5, %bb3
@ -32,9 +33,10 @@ bb7: ; preds = %bb5, %bb3
br i1 %tmp9, label %bb3, label %bb10
; CHECK: Flow1:
; CHECK: br i1 %7, label %bb10, label %bb3
; CHECK: %4 = phi i1 [ %tmp9, %bb7 ], [ true, %Flow ]
; CHECK-NEXT: br i1 %4, label %bb10, label %bb3
; CHECK: bb10
; CHECK: bb10:
bb10: ; preds = %bb7, %bb5
%tmp11 = phi i32 [ 15, %bb5 ], [ 255, %bb7 ]
store i32 %tmp11, i32 addrspace(1)* %arg, align 4

View File

@ -59,7 +59,8 @@ for.end: ; preds = %for.body.1, %if.the
; CHECK: br i1 %{{[0-9]}}, label %for.body.1, label %Flow2
; CHECK: for.body.1:
; CHECK: br i1 %{{[0-9]+}}, label %for.body.6, label %Flow3
; CHECK: %cmp1.5 = icmp ne i32 %tmp22, %K1
; CHECK-NEXT: br i1 %cmp1.5, label %for.body.6, label %Flow3
for.body.1: ; preds = %if.then, %lor.lhs.false
%best_val.233 = phi float [ %tmp5, %if.then ], [ %best_val.027, %lor.lhs.false ]
%best_count.231 = phi i32 [ %sub4, %if.then ], [ %best_count.025, %lor.lhs.false ]