forked from OSchip/llvm-project
[AMDGPU] Fix clamp bit DAG operand
Summary: - Should use `targetconstant` instead of `constant` operand for clamp bit, which is expected as an immediate operand. Under certain conditions, such as a common `i1 false` constant is used in other place and selected before the instruction with clamp bit, register operand may be added instead of immediate one. Use `targetcosntant` to enforce that. Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59608 llvm-svn: 356608
This commit is contained in:
parent
27ef9518de
commit
eea5177d30
|
@ -931,9 +931,10 @@ void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
|
||||||
unsigned Opc = N->getOpcode() == ISD::UADDO ?
|
unsigned Opc = N->getOpcode() == ISD::UADDO ?
|
||||||
AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
|
AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
|
||||||
|
|
||||||
CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
|
CurDAG->SelectNodeTo(
|
||||||
{N->getOperand(0), N->getOperand(1),
|
N, Opc, N->getVTList(),
|
||||||
CurDAG->getConstant(0, {}, MVT::i1)/*clamp bit*/});
|
{N->getOperand(0), N->getOperand(1),
|
||||||
|
CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
|
||||||
}
|
}
|
||||||
|
|
||||||
void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
|
void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
|
||||||
|
@ -1041,7 +1042,8 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
|
||||||
unsigned SubOp = AMDGPU::V_SUB_I32_e32;
|
unsigned SubOp = AMDGPU::V_SUB_I32_e32;
|
||||||
if (Subtarget->hasAddNoCarry()) {
|
if (Subtarget->hasAddNoCarry()) {
|
||||||
SubOp = AMDGPU::V_SUB_U32_e64;
|
SubOp = AMDGPU::V_SUB_U32_e64;
|
||||||
Opnds.push_back(Zero); // clamp bit
|
Opnds.push_back(
|
||||||
|
CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
|
||||||
}
|
}
|
||||||
|
|
||||||
MachineSDNode *MachineSub =
|
MachineSDNode *MachineSub =
|
||||||
|
@ -1119,7 +1121,8 @@ bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
|
||||||
unsigned SubOp = AMDGPU::V_SUB_I32_e32;
|
unsigned SubOp = AMDGPU::V_SUB_I32_e32;
|
||||||
if (Subtarget->hasAddNoCarry()) {
|
if (Subtarget->hasAddNoCarry()) {
|
||||||
SubOp = AMDGPU::V_SUB_U32_e64;
|
SubOp = AMDGPU::V_SUB_U32_e64;
|
||||||
Opnds.push_back(Zero); // clamp bit
|
Opnds.push_back(
|
||||||
|
CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
|
||||||
}
|
}
|
||||||
|
|
||||||
MachineSDNode *MachineSub
|
MachineSDNode *MachineSub
|
||||||
|
|
|
@ -170,6 +170,28 @@ define amdgpu_kernel void @v_uaddo_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32>
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; FUNC-LABEL: {{^}}s_uaddo_clamp_bit:
|
||||||
|
; GCN: v_add_{{i|u|co_u}}32_e32
|
||||||
|
; GCN: s_endpgm
|
||||||
|
define amdgpu_kernel void @s_uaddo_clamp_bit(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) #0 {
|
||||||
|
entry:
|
||||||
|
%uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
|
||||||
|
%val = extractvalue { i32, i1 } %uadd, 0
|
||||||
|
%carry = extractvalue { i32, i1 } %uadd, 1
|
||||||
|
%c2 = icmp eq i1 %carry, false
|
||||||
|
%cc = icmp eq i32 %a, %b
|
||||||
|
br i1 %cc, label %exit, label %if
|
||||||
|
|
||||||
|
if:
|
||||||
|
br label %exit
|
||||||
|
|
||||||
|
exit:
|
||||||
|
%cout = phi i1 [false, %entry], [%c2, %if]
|
||||||
|
store i32 %val, i32 addrspace(1)* %out, align 4
|
||||||
|
store i1 %cout, i1 addrspace(1)* %carryout
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||||
declare { i16, i1 } @llvm.uadd.with.overflow.i16(i16, i16) #1
|
declare { i16, i1 } @llvm.uadd.with.overflow.i16(i16, i16) #1
|
||||||
|
|
Loading…
Reference in New Issue