forked from OSchip/llvm-project
R600: Fix a crash in the AMDILCFGStructurizer
We were calling llvm_unreachable() when failing to optimize the branch into if case. However, it is still possible for us to structurize the CFG by duplicating blocks even if this optimization fails. Reviewed-by: Vincent Lejeune<vljn at ovi.com> llvm-svn: 192813
This commit is contained in:
parent
69f86d199a
commit
b34186ae38
|
@ -1335,8 +1335,74 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
|
||||||
// add initReg = initVal to headBlk
|
// add initReg = initVal to headBlk
|
||||||
|
|
||||||
const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
|
const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
|
||||||
if (!MigrateTrue || !MigrateFalse)
|
if (!MigrateTrue || !MigrateFalse) {
|
||||||
llvm_unreachable("Extra register needed to handle CFG");
|
// XXX: We have an opportunity here to optimize the "branch into if" case
|
||||||
|
// here. Branch into if looks like this:
|
||||||
|
// entry
|
||||||
|
// / \
|
||||||
|
// diamond_head branch_from
|
||||||
|
// / \ |
|
||||||
|
// diamond_false diamond_true
|
||||||
|
// \ /
|
||||||
|
// done
|
||||||
|
//
|
||||||
|
// The diamond_head block begins the "if" and the diamond_true block
|
||||||
|
// is the block being "branched into".
|
||||||
|
//
|
||||||
|
// If MigrateTrue is true, then TrueBB is the block being "branched into"
|
||||||
|
// and if MigrateFalse is true, then FalseBB is the block being
|
||||||
|
// "branched into"
|
||||||
|
//
|
||||||
|
// Here is the pseudo code for how I think the optimization should work:
|
||||||
|
// 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head.
|
||||||
|
// 2. Insert MOV GPR0, 1 before the branch instruction in branch_from.
|
||||||
|
// 3. Move the branch instruction from diamond_head into its own basic
|
||||||
|
// block (new_block).
|
||||||
|
// 4. Add an unconditional branch from diamond_head to new_block
|
||||||
|
// 5. Replace the branch instruction in branch_from with an unconditional
|
||||||
|
// branch to new_block. If branch_from has multiple predecessors, then
|
||||||
|
// we need to replace the True/False block in the branch
|
||||||
|
// instruction instead of replacing it.
|
||||||
|
// 6. Change the condition of the branch instruction in new_block from
|
||||||
|
// COND to (COND || GPR0)
|
||||||
|
//
|
||||||
|
// In order insert these MOV instruction, we will need to use the
|
||||||
|
// RegisterScavenger. Usually liveness stops being tracked during
|
||||||
|
// the late machine optimization passes, however if we implement
|
||||||
|
// bool TargetRegisterInfo::requiresRegisterScavenging(
|
||||||
|
// const MachineFunction &MF)
|
||||||
|
// and have it return true, liveness will be tracked correctly
|
||||||
|
// by generic optimization passes. We will also need to make sure that
|
||||||
|
// all of our target-specific passes that run after regalloc and before
|
||||||
|
// the CFGStructurizer track liveness and we will need to modify this pass
|
||||||
|
// to correctly track liveness.
|
||||||
|
//
|
||||||
|
// After the above changes, the new CFG should look like this:
|
||||||
|
// entry
|
||||||
|
// / \
|
||||||
|
// diamond_head branch_from
|
||||||
|
// \ /
|
||||||
|
// new_block
|
||||||
|
// / \
|
||||||
|
// diamond_false diamond_true
|
||||||
|
// \ /
|
||||||
|
// done
|
||||||
|
//
|
||||||
|
// Without this optimization, we are forced to duplicate the diamond_true
|
||||||
|
// block and we will end up with a CFG like this:
|
||||||
|
//
|
||||||
|
// entry
|
||||||
|
// / \
|
||||||
|
// diamond_head branch_from
|
||||||
|
// / \ |
|
||||||
|
// diamond_false diamond_true diamond_true (duplicate)
|
||||||
|
// \ / |
|
||||||
|
// done --------------------|
|
||||||
|
//
|
||||||
|
// Duplicating diamond_true can be very costly especially if it has a
|
||||||
|
// lot of instructions.
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
int NumNewBlk = 0;
|
int NumNewBlk = 0;
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,83 @@
|
||||||
|
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
|
||||||
|
; Test case for a crash in the AMDILCFGStructurizer from a CFG like this:
|
||||||
|
;
|
||||||
|
; entry
|
||||||
|
; / \
|
||||||
|
; diamond_head branch_from
|
||||||
|
; / \ |
|
||||||
|
; diamond_false diamond_true
|
||||||
|
; \ /
|
||||||
|
; done
|
||||||
|
;
|
||||||
|
; When the diamond_true branch had more than 100 instructions.
|
||||||
|
;
|
||||||
|
;
|
||||||
|
|
||||||
|
; CHECK-LABEL: @branch_into_diamond
|
||||||
|
; === entry block:
|
||||||
|
; CHECK: ALU_PUSH_BEFORE
|
||||||
|
; === Branch instruction (IF):
|
||||||
|
; CHECK: JUMP
|
||||||
|
; === branch_from block
|
||||||
|
; CHECK: ALU
|
||||||
|
; === Duplicated diamond_true block (There can be more than one ALU clause):
|
||||||
|
; === XXX: We should be able to optimize this so the basic block is not
|
||||||
|
; === duplicated. See comments in
|
||||||
|
; === AMDGPUCFGStructurizer::improveSimpleJumpintoIf()
|
||||||
|
; CHECK: ALU
|
||||||
|
; === Branch instruction (ELSE):
|
||||||
|
; CHECK: ELSE
|
||||||
|
; === diamond_head block:
|
||||||
|
; CHECK: ALU_PUSH_BEFORE
|
||||||
|
; === Branch instruction (IF):
|
||||||
|
; CHECK: JUMP
|
||||||
|
; === diamond_true block (There can be more than one ALU clause):
|
||||||
|
; ALU
|
||||||
|
; === Branch instruction (ELSE):
|
||||||
|
; CHECK: ELSE
|
||||||
|
; === diamond_false block plus implicit ENDIF
|
||||||
|
; CHECK: ALU_POP_AFTER
|
||||||
|
; === Branch instruction (ENDIF):
|
||||||
|
; CHECK: POP
|
||||||
|
; === done block:
|
||||||
|
; CHECK: ALU
|
||||||
|
; CHECK: MEM_RAT_CACHELESS
|
||||||
|
; CHECK: CF_END
|
||||||
|
|
||||||
|
|
||||||
|
define void @branch_into_diamond(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
|
||||||
|
entry:
|
||||||
|
%0 = icmp ne i32 %a, 0
|
||||||
|
br i1 %0, label %diamond_head, label %branch_from
|
||||||
|
|
||||||
|
diamond_head:
|
||||||
|
%1 = icmp ne i32 %a, 1
|
||||||
|
br i1 %1, label %diamond_true, label %diamond_false
|
||||||
|
|
||||||
|
branch_from:
|
||||||
|
%2 = add i32 %a, 1
|
||||||
|
br label %diamond_true
|
||||||
|
|
||||||
|
diamond_false:
|
||||||
|
%3 = add i32 %a, 2
|
||||||
|
br label %done
|
||||||
|
|
||||||
|
diamond_true:
|
||||||
|
%4 = phi i32 [%2, %branch_from], [%a, %diamond_head]
|
||||||
|
; This block needs to be > 100 ISA instructions to hit the bug,
|
||||||
|
; so we'll use udiv instructions.
|
||||||
|
%div0 = udiv i32 %a, %b
|
||||||
|
%div1 = udiv i32 %div0, %4
|
||||||
|
%div2 = udiv i32 %div1, 11
|
||||||
|
%div3 = udiv i32 %div2, %a
|
||||||
|
%div4 = udiv i32 %div3, %b
|
||||||
|
%div5 = udiv i32 %div4, %c
|
||||||
|
%div6 = udiv i32 %div5, %div0
|
||||||
|
%div7 = udiv i32 %div6, %div1
|
||||||
|
br label %done
|
||||||
|
|
||||||
|
done:
|
||||||
|
%5 = phi i32 [%3, %diamond_false], [%div7, %diamond_true]
|
||||||
|
store i32 %5, i32 addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
Loading…
Reference in New Issue