forked from OSchip/llvm-project
AMDGPU: Fix backwards s_cselect_* operands
The vector equivalent has backwards operands, but the scalar version does not. The passes that use these hooks aren't enabled by default, so this doesn't really change anything.
This commit is contained in:
parent
2ee4ec6b6f
commit
833996cef1
|
@ -2348,14 +2348,17 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
|
||||||
unsigned DstSize = RI.getRegSizeInBits(*DstRC);
|
unsigned DstSize = RI.getRegSizeInBits(*DstRC);
|
||||||
|
|
||||||
if (DstSize == 32) {
|
if (DstSize == 32) {
|
||||||
unsigned SelOp = Pred == SCC_TRUE ?
|
MachineInstr *Select;
|
||||||
AMDGPU::S_CSELECT_B32 : AMDGPU::V_CNDMASK_B32_e32;
|
if (Pred == SCC_TRUE) {
|
||||||
|
Select = BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B32), DstReg)
|
||||||
// Instruction's operands are backwards from what is expected.
|
.addReg(TrueReg)
|
||||||
MachineInstr *Select =
|
.addReg(FalseReg);
|
||||||
BuildMI(MBB, I, DL, get(SelOp), DstReg)
|
} else {
|
||||||
.addReg(FalseReg)
|
// Instruction's operands are backwards from what is expected.
|
||||||
.addReg(TrueReg);
|
Select = BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e32), DstReg)
|
||||||
|
.addReg(FalseReg)
|
||||||
|
.addReg(TrueReg);
|
||||||
|
}
|
||||||
|
|
||||||
preserveCondRegFlags(Select->getOperand(3), Cond[1]);
|
preserveCondRegFlags(Select->getOperand(3), Cond[1]);
|
||||||
return;
|
return;
|
||||||
|
@ -2364,8 +2367,8 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
|
||||||
if (DstSize == 64 && Pred == SCC_TRUE) {
|
if (DstSize == 64 && Pred == SCC_TRUE) {
|
||||||
MachineInstr *Select =
|
MachineInstr *Select =
|
||||||
BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), DstReg)
|
BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), DstReg)
|
||||||
.addReg(FalseReg)
|
.addReg(TrueReg)
|
||||||
.addReg(TrueReg);
|
.addReg(FalseReg);
|
||||||
|
|
||||||
preserveCondRegFlags(Select->getOperand(3), Cond[1]);
|
preserveCondRegFlags(Select->getOperand(3), Cond[1]);
|
||||||
return;
|
return;
|
||||||
|
@ -2416,10 +2419,19 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
|
||||||
|
|
||||||
unsigned SubIdx = SubIndices[Idx];
|
unsigned SubIdx = SubIndices[Idx];
|
||||||
|
|
||||||
MachineInstr *Select =
|
MachineInstr *Select;
|
||||||
BuildMI(MBB, I, DL, get(SelOp), DstElt)
|
if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
|
||||||
.addReg(FalseReg, 0, SubIdx)
|
Select =
|
||||||
.addReg(TrueReg, 0, SubIdx);
|
BuildMI(MBB, I, DL, get(SelOp), DstElt)
|
||||||
|
.addReg(FalseReg, 0, SubIdx)
|
||||||
|
.addReg(TrueReg, 0, SubIdx);
|
||||||
|
} else {
|
||||||
|
Select =
|
||||||
|
BuildMI(MBB, I, DL, get(SelOp), DstElt)
|
||||||
|
.addReg(TrueReg, 0, SubIdx)
|
||||||
|
.addReg(FalseReg, 0, SubIdx);
|
||||||
|
}
|
||||||
|
|
||||||
preserveCondRegFlags(Select->getOperand(3), Cond[1]);
|
preserveCondRegFlags(Select->getOperand(3), Cond[1]);
|
||||||
fixImplicitOperands(*Select);
|
fixImplicitOperands(*Select);
|
||||||
|
|
||||||
|
|
|
@ -246,7 +246,7 @@ endif:
|
||||||
; GCN: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x0
|
; GCN: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x0
|
||||||
; GCN: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], [[VAL]]
|
; GCN: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], [[VAL]]
|
||||||
; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 1
|
; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 1
|
||||||
; GCN-NEXT: s_cselect_b32 [[SELECT:s[0-9]+]], [[ADD]], [[VAL]]
|
; GCN-NEXT: s_cselect_b32 [[SELECT:s[0-9]+]], [[VAL]], [[ADD]]
|
||||||
define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle(i32 addrspace(4)* %in, i32 %cond) #0 {
|
define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle(i32 addrspace(4)* %in, i32 %cond) #0 {
|
||||||
entry:
|
entry:
|
||||||
%v = load i32, i32 addrspace(4)* %in
|
%v = load i32, i32 addrspace(4)* %in
|
||||||
|
@ -362,7 +362,7 @@ endif:
|
||||||
|
|
||||||
; GCN-LABEL: {{^}}uniform_if_swap_br_targets_scc_constant_select:
|
; GCN-LABEL: {{^}}uniform_if_swap_br_targets_scc_constant_select:
|
||||||
; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0
|
; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0
|
||||||
; GCN: s_cselect_b32 s{{[0-9]+}}, 1, 0{{$}}
|
; GCN: s_cselect_b32 s{{[0-9]+}}, 0, 1{{$}}
|
||||||
define amdgpu_kernel void @uniform_if_swap_br_targets_scc_constant_select(i32 %cond, i32 addrspace(1)* %out) {
|
define amdgpu_kernel void @uniform_if_swap_br_targets_scc_constant_select(i32 %cond, i32 addrspace(1)* %out) {
|
||||||
entry:
|
entry:
|
||||||
%cmp0 = icmp eq i32 %cond, 0
|
%cmp0 = icmp eq i32 %cond, 0
|
||||||
|
@ -383,7 +383,7 @@ done:
|
||||||
; GCN-LABEL: {{^}}ifcvt_undef_scc:
|
; GCN-LABEL: {{^}}ifcvt_undef_scc:
|
||||||
; GCN: {{^}}; %bb.0:
|
; GCN: {{^}}; %bb.0:
|
||||||
; GCN-NEXT: s_load_dwordx2
|
; GCN-NEXT: s_load_dwordx2
|
||||||
; GCN-NEXT: s_cselect_b32 s{{[0-9]+}}, 1, 0
|
; GCN-NEXT: s_cselect_b32 s{{[0-9]+}}, 0, 1{{$}}
|
||||||
define amdgpu_kernel void @ifcvt_undef_scc(i32 %cond, i32 addrspace(1)* %out) {
|
define amdgpu_kernel void @ifcvt_undef_scc(i32 %cond, i32 addrspace(1)* %out) {
|
||||||
entry:
|
entry:
|
||||||
br i1 undef, label %else, label %if
|
br i1 undef, label %else, label %if
|
||||||
|
|
Loading…
Reference in New Issue