AMDGPU: Fix backwards s_cselect_* operands

The vector equivalent has backwards operands, but the scalar version
does not. The passes that use these hooks aren't enabled by default,
so this doesn't really change anything.
This commit is contained in:
Matt Arsenault 2020-05-19 10:16:45 -04:00
parent 2ee4ec6b6f
commit 833996cef1
2 changed files with 29 additions and 17 deletions

View File

@ -2348,14 +2348,17 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
unsigned DstSize = RI.getRegSizeInBits(*DstRC);
if (DstSize == 32) {
unsigned SelOp = Pred == SCC_TRUE ?
AMDGPU::S_CSELECT_B32 : AMDGPU::V_CNDMASK_B32_e32;
MachineInstr *Select;
if (Pred == SCC_TRUE) {
Select = BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B32), DstReg)
.addReg(TrueReg)
.addReg(FalseReg);
} else {
// Instruction's operands are backwards from what is expected.
MachineInstr *Select =
BuildMI(MBB, I, DL, get(SelOp), DstReg)
Select = BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e32), DstReg)
.addReg(FalseReg)
.addReg(TrueReg);
}
preserveCondRegFlags(Select->getOperand(3), Cond[1]);
return;
@ -2364,8 +2367,8 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
if (DstSize == 64 && Pred == SCC_TRUE) {
MachineInstr *Select =
BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), DstReg)
.addReg(FalseReg)
.addReg(TrueReg);
.addReg(TrueReg)
.addReg(FalseReg);
preserveCondRegFlags(Select->getOperand(3), Cond[1]);
return;
@ -2416,10 +2419,19 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
unsigned SubIdx = SubIndices[Idx];
MachineInstr *Select =
MachineInstr *Select;
if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
Select =
BuildMI(MBB, I, DL, get(SelOp), DstElt)
.addReg(FalseReg, 0, SubIdx)
.addReg(TrueReg, 0, SubIdx);
} else {
Select =
BuildMI(MBB, I, DL, get(SelOp), DstElt)
.addReg(TrueReg, 0, SubIdx)
.addReg(FalseReg, 0, SubIdx);
}
preserveCondRegFlags(Select->getOperand(3), Cond[1]);
fixImplicitOperands(*Select);

View File

@ -246,7 +246,7 @@ endif:
; GCN: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x0
; GCN: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], [[VAL]]
; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 1
; GCN-NEXT: s_cselect_b32 [[SELECT:s[0-9]+]], [[ADD]], [[VAL]]
; GCN-NEXT: s_cselect_b32 [[SELECT:s[0-9]+]], [[VAL]], [[ADD]]
define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle(i32 addrspace(4)* %in, i32 %cond) #0 {
entry:
%v = load i32, i32 addrspace(4)* %in
@ -362,7 +362,7 @@ endif:
; GCN-LABEL: {{^}}uniform_if_swap_br_targets_scc_constant_select:
; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0
; GCN: s_cselect_b32 s{{[0-9]+}}, 1, 0{{$}}
; GCN: s_cselect_b32 s{{[0-9]+}}, 0, 1{{$}}
define amdgpu_kernel void @uniform_if_swap_br_targets_scc_constant_select(i32 %cond, i32 addrspace(1)* %out) {
entry:
%cmp0 = icmp eq i32 %cond, 0
@ -383,7 +383,7 @@ done:
; GCN-LABEL: {{^}}ifcvt_undef_scc:
; GCN: {{^}}; %bb.0:
; GCN-NEXT: s_load_dwordx2
; GCN-NEXT: s_cselect_b32 s{{[0-9]+}}, 1, 0
; GCN-NEXT: s_cselect_b32 s{{[0-9]+}}, 0, 1{{$}}
define amdgpu_kernel void @ifcvt_undef_scc(i32 %cond, i32 addrspace(1)* %out) {
entry:
br i1 undef, label %else, label %if