forked from OSchip/llvm-project
R600/SI: Match integer min / max instructions
llvm-svn: 222015
This commit is contained in:
parent
b9987293e6
commit
d28a7fde32
|
@ -1000,19 +1000,14 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
|
|||
}
|
||||
|
||||
/// \brief Generate Min/Max node
|
||||
SDValue AMDGPUTargetLowering::CombineMinMax(SDLoc DL,
|
||||
EVT VT,
|
||||
SDValue LHS,
|
||||
SDValue RHS,
|
||||
SDValue True,
|
||||
SDValue False,
|
||||
SDValue CC,
|
||||
SelectionDAG &DAG) const {
|
||||
if (VT != MVT::f32 &&
|
||||
(VT != MVT::f64 ||
|
||||
Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS))
|
||||
return SDValue();
|
||||
|
||||
SDValue AMDGPUTargetLowering::CombineFMinMax(SDLoc DL,
|
||||
EVT VT,
|
||||
SDValue LHS,
|
||||
SDValue RHS,
|
||||
SDValue True,
|
||||
SDValue False,
|
||||
SDValue CC,
|
||||
SelectionDAG &DAG) const {
|
||||
if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
|
||||
return SDValue();
|
||||
|
||||
|
@ -1057,6 +1052,45 @@ SDValue AMDGPUTargetLowering::CombineMinMax(SDLoc DL,
|
|||
return SDValue();
|
||||
}
|
||||
|
||||
/// \brief Generate Min/Max node
|
||||
SDValue AMDGPUTargetLowering::CombineIMinMax(SDLoc DL,
|
||||
EVT VT,
|
||||
SDValue LHS,
|
||||
SDValue RHS,
|
||||
SDValue True,
|
||||
SDValue False,
|
||||
SDValue CC,
|
||||
SelectionDAG &DAG) const {
|
||||
if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
|
||||
return SDValue();
|
||||
|
||||
ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
|
||||
switch (CCOpcode) {
|
||||
case ISD::SETULE:
|
||||
case ISD::SETULT: {
|
||||
unsigned Opc = (LHS == True) ? AMDGPUISD::UMIN : AMDGPUISD::UMAX;
|
||||
return DAG.getNode(Opc, DL, VT, LHS, RHS);
|
||||
}
|
||||
case ISD::SETLE:
|
||||
case ISD::SETLT: {
|
||||
unsigned Opc = (LHS == True) ? AMDGPUISD::SMIN : AMDGPUISD::SMAX;
|
||||
return DAG.getNode(Opc, DL, VT, LHS, RHS);
|
||||
}
|
||||
case ISD::SETGT:
|
||||
case ISD::SETGE: {
|
||||
unsigned Opc = (LHS == True) ? AMDGPUISD::SMAX : AMDGPUISD::SMIN;
|
||||
return DAG.getNode(Opc, DL, VT, LHS, RHS);
|
||||
}
|
||||
case ISD::SETUGE:
|
||||
case ISD::SETUGT: {
|
||||
unsigned Opc = (LHS == True) ? AMDGPUISD::UMAX : AMDGPUISD::UMIN;
|
||||
return DAG.getNode(Opc, DL, VT, LHS, RHS);
|
||||
}
|
||||
default:
|
||||
return SDValue();
|
||||
}
|
||||
}
|
||||
|
||||
SDValue AMDGPUTargetLowering::ScalarizeVectorLoad(const SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
LoadSDNode *Load = cast<LoadSDNode>(Op);
|
||||
|
@ -2117,20 +2151,25 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
|
|||
SDLoc DL(N);
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
SDValue LHS = N->getOperand(0);
|
||||
SDValue RHS = N->getOperand(1);
|
||||
SDValue True = N->getOperand(2);
|
||||
SDValue False = N->getOperand(3);
|
||||
SDValue CC = N->getOperand(4);
|
||||
if (VT == MVT::f32 ||
|
||||
(VT == MVT::f64 &&
|
||||
Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)) {
|
||||
SDValue LHS = N->getOperand(0);
|
||||
SDValue RHS = N->getOperand(1);
|
||||
SDValue True = N->getOperand(2);
|
||||
SDValue False = N->getOperand(3);
|
||||
SDValue CC = N->getOperand(4);
|
||||
|
||||
return CombineMinMax(DL, VT, LHS, RHS, True, False, CC, DAG);
|
||||
return CombineFMinMax(DL, VT, LHS, RHS, True, False, CC, DAG);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case ISD::SELECT: {
|
||||
SDValue Cond = N->getOperand(0);
|
||||
if (Cond.getOpcode() == ISD::SETCC) {
|
||||
SDLoc DL(N);
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
SDValue LHS = Cond.getOperand(0);
|
||||
SDValue RHS = Cond.getOperand(1);
|
||||
SDValue CC = Cond.getOperand(2);
|
||||
|
@ -2138,8 +2177,17 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
|
|||
SDValue True = N->getOperand(1);
|
||||
SDValue False = N->getOperand(2);
|
||||
|
||||
if (VT == MVT::f32 ||
|
||||
(VT == MVT::f64 &&
|
||||
Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)) {
|
||||
return CombineFMinMax(DL, VT, LHS, RHS, True, False, CC, DAG);
|
||||
}
|
||||
|
||||
return CombineMinMax(DL, VT, LHS, RHS, True, False, CC, DAG);
|
||||
// TODO: Implement min / max Evergreen instructions.
|
||||
if (VT == MVT::i32 &&
|
||||
Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
|
||||
return CombineIMinMax(DL, VT, LHS, RHS, True, False, CC, DAG);
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
|
|
|
@ -140,14 +140,23 @@ public:
|
|||
|
||||
SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue CombineMinMax(SDLoc DL,
|
||||
EVT VT,
|
||||
SDValue LHS,
|
||||
SDValue RHS,
|
||||
SDValue True,
|
||||
SDValue False,
|
||||
SDValue CC,
|
||||
SelectionDAG &DAG) const;
|
||||
SDValue CombineFMinMax(SDLoc DL,
|
||||
EVT VT,
|
||||
SDValue LHS,
|
||||
SDValue RHS,
|
||||
SDValue True,
|
||||
SDValue False,
|
||||
SDValue CC,
|
||||
SelectionDAG &DAG) const;
|
||||
SDValue CombineIMinMax(SDLoc DL,
|
||||
EVT VT,
|
||||
SDValue LHS,
|
||||
SDValue RHS,
|
||||
SDValue True,
|
||||
SDValue False,
|
||||
SDValue CC,
|
||||
SelectionDAG &DAG) const;
|
||||
|
||||
const char* getTargetNodeName(unsigned Opcode) const override;
|
||||
|
||||
virtual SDNode *PostISelFolding(MachineSDNode *N,
|
||||
|
|
|
@ -0,0 +1,99 @@
|
|||
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
|
||||
; FUNC-LABEL: @v_test_imax_sge_i32
|
||||
; SI: v_max_i32_e32
|
||||
define void @v_test_imax_sge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
|
||||
%gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
|
||||
%outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
|
||||
%a = load i32 addrspace(1)* %gep0, align 4
|
||||
%b = load i32 addrspace(1)* %gep1, align 4
|
||||
%cmp = icmp sge i32 %a, %b
|
||||
%val = select i1 %cmp, i32 %a, i32 %b
|
||||
store i32 %val, i32 addrspace(1)* %outgep, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @s_test_imax_sge_i32
|
||||
; SI: s_max_i32
|
||||
define void @s_test_imax_sge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
|
||||
%cmp = icmp sge i32 %a, %b
|
||||
%val = select i1 %cmp, i32 %a, i32 %b
|
||||
store i32 %val, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @v_test_imax_sgt_i32
|
||||
; SI: v_max_i32_e32
|
||||
define void @v_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
|
||||
%gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
|
||||
%outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
|
||||
%a = load i32 addrspace(1)* %gep0, align 4
|
||||
%b = load i32 addrspace(1)* %gep1, align 4
|
||||
%cmp = icmp sgt i32 %a, %b
|
||||
%val = select i1 %cmp, i32 %a, i32 %b
|
||||
store i32 %val, i32 addrspace(1)* %outgep, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @s_test_imax_sgt_i32
|
||||
; SI: s_max_i32
|
||||
define void @s_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
|
||||
%cmp = icmp sgt i32 %a, %b
|
||||
%val = select i1 %cmp, i32 %a, i32 %b
|
||||
store i32 %val, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @v_test_umax_uge_i32
|
||||
; SI: v_max_u32_e32
|
||||
define void @v_test_umax_uge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
|
||||
%gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
|
||||
%outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
|
||||
%a = load i32 addrspace(1)* %gep0, align 4
|
||||
%b = load i32 addrspace(1)* %gep1, align 4
|
||||
%cmp = icmp uge i32 %a, %b
|
||||
%val = select i1 %cmp, i32 %a, i32 %b
|
||||
store i32 %val, i32 addrspace(1)* %outgep, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @s_test_umax_uge_i32
|
||||
; SI: s_max_u32
|
||||
define void @s_test_umax_uge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
|
||||
%cmp = icmp uge i32 %a, %b
|
||||
%val = select i1 %cmp, i32 %a, i32 %b
|
||||
store i32 %val, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @v_test_umax_ugt_i32
|
||||
; SI: v_max_u32_e32
|
||||
define void @v_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
|
||||
%gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
|
||||
%outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
|
||||
%a = load i32 addrspace(1)* %gep0, align 4
|
||||
%b = load i32 addrspace(1)* %gep1, align 4
|
||||
%cmp = icmp ugt i32 %a, %b
|
||||
%val = select i1 %cmp, i32 %a, i32 %b
|
||||
store i32 %val, i32 addrspace(1)* %outgep, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @s_test_umax_ugt_i32
|
||||
; SI: s_max_u32
|
||||
define void @s_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
|
||||
%cmp = icmp ugt i32 %a, %b
|
||||
%val = select i1 %cmp, i32 %a, i32 %b
|
||||
store i32 %val, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,99 @@
|
|||
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
|
||||
; FUNC-LABEL: @v_test_imin_sle_i32
|
||||
; SI: v_min_i32_e32
|
||||
define void @v_test_imin_sle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
|
||||
%gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
|
||||
%outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
|
||||
%a = load i32 addrspace(1)* %gep0, align 4
|
||||
%b = load i32 addrspace(1)* %gep1, align 4
|
||||
%cmp = icmp sle i32 %a, %b
|
||||
%val = select i1 %cmp, i32 %a, i32 %b
|
||||
store i32 %val, i32 addrspace(1)* %outgep, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @s_test_imin_sle_i32
|
||||
; SI: s_min_i32
|
||||
define void @s_test_imin_sle_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
|
||||
%cmp = icmp sle i32 %a, %b
|
||||
%val = select i1 %cmp, i32 %a, i32 %b
|
||||
store i32 %val, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @v_test_imin_slt_i32
|
||||
; SI: v_min_i32_e32
|
||||
define void @v_test_imin_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
|
||||
%gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
|
||||
%outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
|
||||
%a = load i32 addrspace(1)* %gep0, align 4
|
||||
%b = load i32 addrspace(1)* %gep1, align 4
|
||||
%cmp = icmp slt i32 %a, %b
|
||||
%val = select i1 %cmp, i32 %a, i32 %b
|
||||
store i32 %val, i32 addrspace(1)* %outgep, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @s_test_imin_slt_i32
|
||||
; SI: s_min_i32
|
||||
define void @s_test_imin_slt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
|
||||
%cmp = icmp slt i32 %a, %b
|
||||
%val = select i1 %cmp, i32 %a, i32 %b
|
||||
store i32 %val, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @v_test_umin_ule_i32
|
||||
; SI: v_min_u32_e32
|
||||
define void @v_test_umin_ule_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
|
||||
%gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
|
||||
%outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
|
||||
%a = load i32 addrspace(1)* %gep0, align 4
|
||||
%b = load i32 addrspace(1)* %gep1, align 4
|
||||
%cmp = icmp ule i32 %a, %b
|
||||
%val = select i1 %cmp, i32 %a, i32 %b
|
||||
store i32 %val, i32 addrspace(1)* %outgep, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @s_test_umin_ule_i32
|
||||
; SI: s_min_u32
|
||||
define void @s_test_umin_ule_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
|
||||
%cmp = icmp ule i32 %a, %b
|
||||
%val = select i1 %cmp, i32 %a, i32 %b
|
||||
store i32 %val, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @v_test_umin_ult_i32
|
||||
; SI: v_min_u32_e32
|
||||
define void @v_test_umin_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
|
||||
%gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
|
||||
%outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
|
||||
%a = load i32 addrspace(1)* %gep0, align 4
|
||||
%b = load i32 addrspace(1)* %gep1, align 4
|
||||
%cmp = icmp ult i32 %a, %b
|
||||
%val = select i1 %cmp, i32 %a, i32 %b
|
||||
store i32 %val, i32 addrspace(1)* %outgep, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @s_test_umin_ult_i32
|
||||
; SI: s_min_u32
|
||||
define void @s_test_umin_ult_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
|
||||
%cmp = icmp ult i32 %a, %b
|
||||
%val = select i1 %cmp, i32 %a, i32 %b
|
||||
store i32 %val, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue