forked from OSchip/llvm-project
R600: Fix a crash in the AMDILCFGStructurizer
We were calling llvm_unreachable() when failing to optimize the branch into if case. However, it is still possible for us to structurize the CFG by duplicating blocks even if this optimization fails. Reviewed-by: Vincent Lejeune<vljn at ovi.com> llvm-svn: 192813
This commit is contained in:
parent
69f86d199a
commit
b34186ae38
|
@ -1335,8 +1335,74 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
|
|||
// add initReg = initVal to headBlk
|
||||
|
||||
const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
|
||||
if (!MigrateTrue || !MigrateFalse)
|
||||
llvm_unreachable("Extra register needed to handle CFG");
|
||||
if (!MigrateTrue || !MigrateFalse) {
|
||||
// XXX: We have an opportunity here to optimize the "branch into if" case
|
||||
// here. Branch into if looks like this:
|
||||
// entry
|
||||
// / \
|
||||
// diamond_head branch_from
|
||||
// / \ |
|
||||
// diamond_false diamond_true
|
||||
// \ /
|
||||
// done
|
||||
//
|
||||
// The diamond_head block begins the "if" and the diamond_true block
|
||||
// is the block being "branched into".
|
||||
//
|
||||
// If MigrateTrue is true, then TrueBB is the block being "branched into"
|
||||
// and if MigrateFalse is true, then FalseBB is the block being
|
||||
// "branched into"
|
||||
//
|
||||
// Here is the pseudo code for how I think the optimization should work:
|
||||
// 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head.
|
||||
// 2. Insert MOV GPR0, 1 before the branch instruction in branch_from.
|
||||
// 3. Move the branch instruction from diamond_head into its own basic
|
||||
// block (new_block).
|
||||
// 4. Add an unconditional branch from diamond_head to new_block
|
||||
// 5. Replace the branch instruction in branch_from with an unconditional
|
||||
// branch to new_block. If branch_from has multiple predecessors, then
|
||||
// we need to replace the True/False block in the branch
|
||||
// instruction instead of replacing it.
|
||||
// 6. Change the condition of the branch instruction in new_block from
|
||||
// COND to (COND || GPR0)
|
||||
//
|
||||
// In order insert these MOV instruction, we will need to use the
|
||||
// RegisterScavenger. Usually liveness stops being tracked during
|
||||
// the late machine optimization passes, however if we implement
|
||||
// bool TargetRegisterInfo::requiresRegisterScavenging(
|
||||
// const MachineFunction &MF)
|
||||
// and have it return true, liveness will be tracked correctly
|
||||
// by generic optimization passes. We will also need to make sure that
|
||||
// all of our target-specific passes that run after regalloc and before
|
||||
// the CFGStructurizer track liveness and we will need to modify this pass
|
||||
// to correctly track liveness.
|
||||
//
|
||||
// After the above changes, the new CFG should look like this:
|
||||
// entry
|
||||
// / \
|
||||
// diamond_head branch_from
|
||||
// \ /
|
||||
// new_block
|
||||
// / \
|
||||
// diamond_false diamond_true
|
||||
// \ /
|
||||
// done
|
||||
//
|
||||
// Without this optimization, we are forced to duplicate the diamond_true
|
||||
// block and we will end up with a CFG like this:
|
||||
//
|
||||
// entry
|
||||
// / \
|
||||
// diamond_head branch_from
|
||||
// / \ |
|
||||
// diamond_false diamond_true diamond_true (duplicate)
|
||||
// \ / |
|
||||
// done --------------------|
|
||||
//
|
||||
// Duplicating diamond_true can be very costly especially if it has a
|
||||
// lot of instructions.
|
||||
return 0;
|
||||
}
|
||||
|
||||
int NumNewBlk = 0;
|
||||
|
||||
|
|
|
@ -0,0 +1,83 @@
|
|||
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
|
||||
; Test case for a crash in the AMDILCFGStructurizer from a CFG like this:
|
||||
;
|
||||
; entry
|
||||
; / \
|
||||
; diamond_head branch_from
|
||||
; / \ |
|
||||
; diamond_false diamond_true
|
||||
; \ /
|
||||
; done
|
||||
;
|
||||
; When the diamond_true branch had more than 100 instructions.
|
||||
;
|
||||
;
|
||||
|
||||
; CHECK-LABEL: @branch_into_diamond
|
||||
; === entry block:
|
||||
; CHECK: ALU_PUSH_BEFORE
|
||||
; === Branch instruction (IF):
|
||||
; CHECK: JUMP
|
||||
; === branch_from block
|
||||
; CHECK: ALU
|
||||
; === Duplicated diamond_true block (There can be more than one ALU clause):
|
||||
; === XXX: We should be able to optimize this so the basic block is not
|
||||
; === duplicated. See comments in
|
||||
; === AMDGPUCFGStructurizer::improveSimpleJumpintoIf()
|
||||
; CHECK: ALU
|
||||
; === Branch instruction (ELSE):
|
||||
; CHECK: ELSE
|
||||
; === diamond_head block:
|
||||
; CHECK: ALU_PUSH_BEFORE
|
||||
; === Branch instruction (IF):
|
||||
; CHECK: JUMP
|
||||
; === diamond_true block (There can be more than one ALU clause):
|
||||
; ALU
|
||||
; === Branch instruction (ELSE):
|
||||
; CHECK: ELSE
|
||||
; === diamond_false block plus implicit ENDIF
|
||||
; CHECK: ALU_POP_AFTER
|
||||
; === Branch instruction (ENDIF):
|
||||
; CHECK: POP
|
||||
; === done block:
|
||||
; CHECK: ALU
|
||||
; CHECK: MEM_RAT_CACHELESS
|
||||
; CHECK: CF_END
|
||||
|
||||
|
||||
define void @branch_into_diamond(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
|
||||
entry:
|
||||
%0 = icmp ne i32 %a, 0
|
||||
br i1 %0, label %diamond_head, label %branch_from
|
||||
|
||||
diamond_head:
|
||||
%1 = icmp ne i32 %a, 1
|
||||
br i1 %1, label %diamond_true, label %diamond_false
|
||||
|
||||
branch_from:
|
||||
%2 = add i32 %a, 1
|
||||
br label %diamond_true
|
||||
|
||||
diamond_false:
|
||||
%3 = add i32 %a, 2
|
||||
br label %done
|
||||
|
||||
diamond_true:
|
||||
%4 = phi i32 [%2, %branch_from], [%a, %diamond_head]
|
||||
; This block needs to be > 100 ISA instructions to hit the bug,
|
||||
; so we'll use udiv instructions.
|
||||
%div0 = udiv i32 %a, %b
|
||||
%div1 = udiv i32 %div0, %4
|
||||
%div2 = udiv i32 %div1, 11
|
||||
%div3 = udiv i32 %div2, %a
|
||||
%div4 = udiv i32 %div3, %b
|
||||
%div5 = udiv i32 %div4, %c
|
||||
%div6 = udiv i32 %div5, %div0
|
||||
%div7 = udiv i32 %div6, %div1
|
||||
br label %done
|
||||
|
||||
done:
|
||||
%5 = phi i32 [%3, %diamond_false], [%div7, %diamond_true]
|
||||
store i32 %5, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue