SelectionDAG: Match min/max if the scalar operation is legal

llvm-svn: 255388
This commit is contained in:
Matt Arsenault 2015-12-11 23:16:47 +00:00
parent cd8664c3c2
commit fabab4b7dd
7 changed files with 361 additions and 87 deletions

View File

@ -111,6 +111,10 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FMAXNUM: case ISD::FMAXNUM:
case ISD::FMINNAN: case ISD::FMINNAN:
case ISD::FMAXNAN: case ISD::FMAXNAN:
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
case ISD::UMAX:
case ISD::FPOW: case ISD::FPOW:
case ISD::FREM: case ISD::FREM:
@ -1984,6 +1988,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FMAXNUM: case ISD::FMAXNUM:
case ISD::FMINNAN: case ISD::FMINNAN:
case ISD::FMAXNAN: case ISD::FMAXNAN:
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
case ISD::UMAX:
Res = WidenVecRes_Binary(N); Res = WidenVecRes_Binary(N);
break; break;

View File

@ -2470,9 +2470,18 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
EVT VT = ValueVTs[0]; EVT VT = ValueVTs[0];
LLVMContext &Ctx = *DAG.getContext(); LLVMContext &Ctx = *DAG.getContext();
auto &TLI = DAG.getTargetLoweringInfo(); auto &TLI = DAG.getTargetLoweringInfo();
while (TLI.getTypeAction(Ctx, VT) == TargetLoweringBase::TypeSplitVector)
// We care about the legality of the operation after it has been type
// legalized.
while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal)
VT = TLI.getTypeToTransformTo(Ctx, VT); VT = TLI.getTypeToTransformTo(Ctx, VT);
// If the vselect is legal, assume we want to leave this as a vector setcc +
// vselect. Otherwise, if this is going to be scalarized, we want to see if
// min/max is legal on the scalar type.
bool UseScalarMinMax = VT.isVector() &&
!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT);
Value *LHS, *RHS; Value *LHS, *RHS;
auto SPR = matchSelectPattern(const_cast<User*>(&I), LHS, RHS); auto SPR = matchSelectPattern(const_cast<User*>(&I), LHS, RHS);
ISD::NodeType Opc = ISD::DELETED_NODE; ISD::NodeType Opc = ISD::DELETED_NODE;
@ -2486,11 +2495,17 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?"); case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
case SPNB_RETURNS_NAN: Opc = ISD::FMINNAN; break; case SPNB_RETURNS_NAN: Opc = ISD::FMINNAN; break;
case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break; case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
case SPNB_RETURNS_ANY: case SPNB_RETURNS_ANY: {
Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT) ? ISD::FMINNUM if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT))
: ISD::FMINNAN; Opc = ISD::FMINNUM;
else if (TLI.isOperationLegalOrCustom(ISD::FMINNAN, VT))
Opc = ISD::FMINNAN;
else if (UseScalarMinMax)
Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()) ?
ISD::FMINNUM : ISD::FMINNAN;
break; break;
} }
}
break; break;
case SPF_FMAXNUM: case SPF_FMAXNUM:
switch (SPR.NaNBehavior) { switch (SPR.NaNBehavior) {
@ -2498,18 +2513,27 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
case SPNB_RETURNS_NAN: Opc = ISD::FMAXNAN; break; case SPNB_RETURNS_NAN: Opc = ISD::FMAXNAN; break;
case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break; case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
case SPNB_RETURNS_ANY: case SPNB_RETURNS_ANY:
Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT) ? ISD::FMAXNUM
: ISD::FMAXNAN; if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT))
Opc = ISD::FMAXNUM;
else if (TLI.isOperationLegalOrCustom(ISD::FMAXNAN, VT))
Opc = ISD::FMAXNAN;
else if (UseScalarMinMax)
Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()) ?
ISD::FMAXNUM : ISD::FMAXNAN;
break; break;
} }
break; break;
default: break; default: break;
} }
if (Opc != ISD::DELETED_NODE && TLI.isOperationLegalOrCustom(Opc, VT) && if (Opc != ISD::DELETED_NODE &&
// If the underlying comparison instruction is used by any other instruction, (TLI.isOperationLegalOrCustom(Opc, VT) ||
// the consumed instructions won't be destroyed, so it is not profitable (UseScalarMinMax &&
// to convert to a min/max. TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) &&
// If the underlying comparison instruction is used by any other
// instruction, the consumed instructions won't be destroyed, so it is
// not profitable to convert to a min/max.
cast<SelectInst>(&I)->getCondition()->hasOneUse()) { cast<SelectInst>(&I)->getCondition()->hasOneUse()) {
OpCode = Opc; OpCode = Opc;
LHSVal = getValue(LHS); LHSVal = getValue(LHS);

View File

@ -87,6 +87,46 @@ define void @test_fmax_legacy_ogt_f32(float addrspace(1)* %out, float addrspace(
ret void ret void
} }
; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_v1f32:
; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
; EG: MAX
define void @test_fmax_legacy_ogt_v1f32(<1 x float> addrspace(1)* %out, <1 x float> addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
%gep.0 = getelementptr <1 x float>, <1 x float> addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr <1 x float>, <1 x float> addrspace(1)* %gep.0, i32 1
%a = load <1 x float>, <1 x float> addrspace(1)* %gep.0
%b = load <1 x float>, <1 x float> addrspace(1)* %gep.1
%cmp = fcmp ogt <1 x float> %a, %b
%val = select <1 x i1> %cmp, <1 x float> %a, <1 x float> %b
store <1 x float> %val, <1 x float> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_v3f32:
; SI-SAFE: v_max_legacy_f32_e32
; SI-SAFE: v_max_legacy_f32_e32
; SI-SAFE: v_max_legacy_f32_e32
; SI-NONAN: v_max_f32_e32
; SI-NONAN: v_max_f32_e32
; SI-NONAN: v_max_f32_e32
define void @test_fmax_legacy_ogt_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
%gep.0 = getelementptr <3 x float>, <3 x float> addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr <3 x float>, <3 x float> addrspace(1)* %gep.0, i32 1
%a = load <3 x float>, <3 x float> addrspace(1)* %gep.0
%b = load <3 x float>, <3 x float> addrspace(1)* %gep.1
%cmp = fcmp ogt <3 x float> %a, %b
%val = select <3 x i1> %cmp, <3 x float> %a, <3 x float> %b
store <3 x float> %val, <3 x float> addrspace(1)* %out
ret void
}
; FUNC-LABEL: @test_fmax_legacy_ogt_f32_multi_use ; FUNC-LABEL: @test_fmax_legacy_ogt_f32_multi_use
; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}

View File

@ -96,6 +96,69 @@ define void @test_fmin_legacy_ult_f32(float addrspace(1)* %out, float addrspace(
ret void ret void
} }
; FUNC-LABEL: {{^}}test_fmin_legacy_ult_v1f32:
; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
define void @test_fmin_legacy_ult_v1f32(<1 x float> addrspace(1)* %out, <1 x float> addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
%gep.0 = getelementptr <1 x float>, <1 x float> addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr <1 x float>, <1 x float> addrspace(1)* %gep.0, i32 1
%a = load <1 x float>, <1 x float> addrspace(1)* %gep.0
%b = load <1 x float>, <1 x float> addrspace(1)* %gep.1
%cmp = fcmp ult <1 x float> %a, %b
%val = select <1 x i1> %cmp, <1 x float> %a, <1 x float> %b
store <1 x float> %val, <1 x float> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}test_fmin_legacy_ult_v2f32:
; SI: buffer_load_dwordx2
; SI: buffer_load_dwordx2
; SI-SAFE: v_min_legacy_f32_e32
; SI-SAFE: v_min_legacy_f32_e32
; SI-NONAN: v_min_f32_e32
; SI-NONAN: v_min_f32_e32
define void @test_fmin_legacy_ult_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
%gep.0 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr <2 x float>, <2 x float> addrspace(1)* %gep.0, i32 1
%a = load <2 x float>, <2 x float> addrspace(1)* %gep.0
%b = load <2 x float>, <2 x float> addrspace(1)* %gep.1
%cmp = fcmp ult <2 x float> %a, %b
%val = select <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
store <2 x float> %val, <2 x float> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}test_fmin_legacy_ult_v3f32:
; SI-SAFE: v_min_legacy_f32_e32
; SI-SAFE: v_min_legacy_f32_e32
; SI-SAFE: v_min_legacy_f32_e32
; SI-NONAN: v_min_f32_e32
; SI-NONAN: v_min_f32_e32
; SI-NONAN: v_min_f32_e32
define void @test_fmin_legacy_ult_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
%gep.0 = getelementptr <3 x float>, <3 x float> addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr <3 x float>, <3 x float> addrspace(1)* %gep.0, i32 1
%a = load <3 x float>, <3 x float> addrspace(1)* %gep.0
%b = load <3 x float>, <3 x float> addrspace(1)* %gep.1
%cmp = fcmp ult <3 x float> %a, %b
%val = select <3 x i1> %cmp, <3 x float> %a, <3 x float> %b
store <3 x float> %val, <3 x float> addrspace(1)* %out
ret void
}
; FUNC-LABEL: @test_fmin_legacy_ole_f32_multi_use ; FUNC-LABEL: @test_fmin_legacy_ole_f32_multi_use
; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4

View File

@ -2,7 +2,7 @@
declare i32 @llvm.r600.read.tidig.x() nounwind readnone declare i32 @llvm.r600.read.tidig.x() nounwind readnone
; FUNC-LABEL: @v_test_imax_sge_i32 ; FUNC-LABEL: {{^}}v_test_imax_sge_i32:
; SI: v_max_i32_e32 ; SI: v_max_i32_e32
define void @v_test_imax_sge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { define void @v_test_imax_sge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
@ -17,6 +17,24 @@ define void @v_test_imax_sge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr
ret void ret void
} }
; FUNC-LABEL: {{^}}v_test_imax_sge_v4i32:
; SI: v_max_i32_e32
; SI: v_max_i32_e32
; SI: v_max_i32_e32
; SI: v_max_i32_e32
define void @v_test_imax_sge_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %aptr, <4 x i32> addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep0 = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %bptr, i32 %tid
%outgep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %out, i32 %tid
%a = load <4 x i32>, <4 x i32> addrspace(1)* %gep0, align 4
%b = load <4 x i32>, <4 x i32> addrspace(1)* %gep1, align 4
%cmp = icmp sge <4 x i32> %a, %b
%val = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> %b
store <4 x i32> %val, <4 x i32> addrspace(1)* %outgep, align 4
ret void
}
; FUNC-LABEL: @s_test_imax_sge_i32 ; FUNC-LABEL: @s_test_imax_sge_i32
; SI: s_max_i32 ; SI: s_max_i32
define void @s_test_imax_sge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { define void @s_test_imax_sge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
@ -44,6 +62,15 @@ define void @s_test_imax_sgt_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind {
ret void ret void
} }
; FUNC-LABEL: {{^}}s_test_imax_sgt_imm_v2i32:
; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9
; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9
define void @s_test_imax_sgt_imm_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a) nounwind {
%cmp = icmp sgt <2 x i32> %a, <i32 9, i32 9>
%val = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> <i32 9, i32 9>
store <2 x i32> %val, <2 x i32> addrspace(1)* %out, align 4
ret void
}
; FUNC-LABEL: @v_test_imax_sgt_i32 ; FUNC-LABEL: @v_test_imax_sgt_i32
; SI: v_max_i32_e32 ; SI: v_max_i32_e32
define void @v_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { define void @v_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
@ -92,6 +119,19 @@ define void @s_test_umax_uge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin
ret void ret void
} }
; FUNC-LABEL: {{^}}s_test_umax_uge_v3i32:
; SI: s_max_u32
; SI: s_max_u32
; SI: s_max_u32
; SI-NOT: s_max_u32
; SI: s_endpgm
define void @s_test_umax_uge_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> %a, <3 x i32> %b) nounwind {
%cmp = icmp uge <3 x i32> %a, %b
%val = select <3 x i1> %cmp, <3 x i32> %a, <3 x i32> %b
store <3 x i32> %val, <3 x i32> addrspace(1)* %out, align 4
ret void
}
; FUNC-LABEL: @v_test_umax_ugt_i32 ; FUNC-LABEL: @v_test_umax_ugt_i32
; SI: v_max_u32_e32 ; SI: v_max_u32_e32
define void @v_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { define void @v_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
@ -107,7 +147,7 @@ define void @v_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr
ret void ret void
} }
; FUNC-LABEL: @s_test_umax_ugt_i32 ; FUNC-LABEL: {{^}}s_test_umax_ugt_i32:
; SI: s_max_u32 ; SI: s_max_u32
define void @s_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { define void @s_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%cmp = icmp ugt i32 %a, %b %cmp = icmp ugt i32 %a, %b
@ -116,13 +156,23 @@ define void @s_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin
ret void ret void
} }
; FUNC-LABEL: {{^}}s_test_umax_ugt_imm_v2i32:
; SI: s_max_u32 {{s[0-9]+}}, {{s[0-9]+}}, 15
; SI: s_max_u32 {{s[0-9]+}}, {{s[0-9]+}}, 23
define void @s_test_umax_ugt_imm_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a) nounwind {
%cmp = icmp ugt <2 x i32> %a, <i32 15, i32 23>
%val = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> <i32 15, i32 23>
store <2 x i32> %val, <2 x i32> addrspace(1)* %out, align 4
ret void
}
; Make sure redundant and removed ; Make sure redundant and removed
; FUNC-LABEL: {{^}}simplify_demanded_bits_test_umax_ugt_i16: ; FUNC-LABEL: {{^}}simplify_demanded_bits_test_umax_ugt_i16:
; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc ; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
; SI: s_max_u32 [[MIN:s[0-9]+]], [[A]], [[B]] ; SI: s_max_u32 [[MAX:s[0-9]+]], [[A]], [[B]]
; SI-NEXT: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]] ; SI-NEXT: v_mov_b32_e32 [[VMAX:v[0-9]+]], [[MAX]]
; SI-NEXT: buffer_store_dword [[VMIN]] ; SI-NEXT: buffer_store_dword [[VMAX]]
define void @simplify_demanded_bits_test_umax_ugt_i16(i32 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) nounwind { define void @simplify_demanded_bits_test_umax_ugt_i16(i32 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) nounwind {
%a.ext = zext i16 %a to i32 %a.ext = zext i16 %a to i32
%b.ext = zext i16 %b to i32 %b.ext = zext i16 %b to i32
@ -135,13 +185,13 @@ define void @simplify_demanded_bits_test_umax_ugt_i16(i32 addrspace(1)* %out, i1
; Make sure redundant sign_extend_inreg removed. ; Make sure redundant sign_extend_inreg removed.
; FUNC-LABEL: {{^}}simplify_demanded_bits_test_min_slt_i16: ; FUNC-LABEL: {{^}}simplify_demanded_bits_test_max_slt_i16:
; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc ; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
; SI: s_max_i32 [[MIN:s[0-9]+]], [[A]], [[B]] ; SI: s_max_i32 [[MAX:s[0-9]+]], [[A]], [[B]]
; SI-NEXT: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]] ; SI-NEXT: v_mov_b32_e32 [[VMAX:v[0-9]+]], [[MAX]]
; SI-NEXT: buffer_store_dword [[VMIN]] ; SI-NEXT: buffer_store_dword [[VMAX]]
define void @simplify_demanded_bits_test_min_slt_i16(i32 addrspace(1)* %out, i16 signext %a, i16 signext %b) nounwind { define void @simplify_demanded_bits_test_max_slt_i16(i32 addrspace(1)* %out, i16 signext %a, i16 signext %b) nounwind {
%a.ext = sext i16 %a to i32 %a.ext = sext i16 %a to i32
%b.ext = sext i16 %b to i32 %b.ext = sext i16 %b to i32
%cmp = icmp sgt i32 %a.ext, %b.ext %cmp = icmp sgt i32 %a.ext, %b.ext
@ -152,15 +202,9 @@ define void @simplify_demanded_bits_test_min_slt_i16(i32 addrspace(1)* %out, i16
ret void ret void
} }
; FIXME: Should get match min/max through extends inserted by ; FUNC-LABEL: {{^}}s_test_imax_sge_i16:
; legalization. ; SI: s_max_i32
define void @s_test_imax_sge_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind {
; FUNC-LABEL: {{^}}s_test_imin_sge_i16:
; SI: s_sext_i32_i16
; SI: s_sext_i32_i16
; SI: v_cmp_ge_i32_e32
; SI: v_cndmask_b32
define void @s_test_imin_sge_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind {
%cmp = icmp sge i16 %a, %b %cmp = icmp sge i16 %a, %b
%val = select i1 %cmp, i16 %a, i16 %b %val = select i1 %cmp, i16 %a, i16 %b
store i16 %val, i16 addrspace(1)* %out store i16 %val, i16 addrspace(1)* %out

View File

@ -2,7 +2,7 @@
declare i32 @llvm.r600.read.tidig.x() nounwind readnone declare i32 @llvm.r600.read.tidig.x() nounwind readnone
; FUNC-LABEL: @v_test_imin_sle_i32 ; FUNC-LABEL: {{^}}v_test_imin_sle_i32:
; SI: v_min_i32_e32 ; SI: v_min_i32_e32
define void @v_test_imin_sle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { define void @v_test_imin_sle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
@ -17,7 +17,7 @@ define void @v_test_imin_sle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr
ret void ret void
} }
; FUNC-LABEL: @s_test_imin_sle_i32 ; FUNC-LABEL: {{^}}s_test_imin_sle_i32:
; SI: s_min_i32 ; SI: s_min_i32
define void @s_test_imin_sle_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { define void @s_test_imin_sle_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%cmp = icmp sle i32 %a, %b %cmp = icmp sle i32 %a, %b
@ -26,6 +26,63 @@ define void @s_test_imin_sle_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin
ret void ret void
} }
; FUNC-LABEL: {{^}}s_test_imin_sle_v1i32:
; SI: s_min_i32
define void @s_test_imin_sle_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind {
%cmp = icmp sle <1 x i32> %a, %b
%val = select <1 x i1> %cmp, <1 x i32> %a, <1 x i32> %b
store <1 x i32> %val, <1 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}s_test_imin_sle_v4i32:
; SI: s_min_i32
; SI: s_min_i32
; SI: s_min_i32
; SI: s_min_i32
define void @s_test_imin_sle_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind {
%cmp = icmp sle <4 x i32> %a, %b
%val = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> %b
store <4 x i32> %val, <4 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}s_test_imin_sle_i8:
; SI: s_min_i32
define void @s_test_imin_sle_i8(i8 addrspace(1)* %out, i8 %a, i8 %b) nounwind {
%cmp = icmp sle i8 %a, %b
%val = select i1 %cmp, i8 %a, i8 %b
store i8 %val, i8 addrspace(1)* %out
ret void
}
; XXX - should be able to use s_min if we stop unnecessarily doing
; extloads with mubuf instructions.
; FUNC-LABEL: {{^}}s_test_imin_sle_v4i8:
; SI: v_min_i32
; SI: v_min_i32
; SI: v_min_i32
; SI: v_min_i32
define void @s_test_imin_sle_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, <4 x i8> %b) nounwind {
%cmp = icmp sle <4 x i8> %a, %b
%val = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %b
store <4 x i8> %val, <4 x i8> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}s_test_imin_sle_v4i16:
; SI: v_min_i32
; SI: v_min_i32
; SI: v_min_i32
; SI: v_min_i32
define void @s_test_imin_sle_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b) nounwind {
%cmp = icmp sle <4 x i16> %a, %b
%val = select <4 x i1> %cmp, <4 x i16> %a, <4 x i16> %b
store <4 x i16> %val, <4 x i16> addrspace(1)* %out
ret void
}
; FUNC-LABEL: @v_test_imin_slt_i32 ; FUNC-LABEL: @v_test_imin_slt_i32
; SI: v_min_i32_e32 ; SI: v_min_i32_e32
define void @v_test_imin_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { define void @v_test_imin_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
@ -50,6 +107,16 @@ define void @s_test_imin_slt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin
ret void ret void
} }
; FUNC-LABEL: {{^}}s_test_imin_slt_v2i32:
; SI: s_min_i32
; SI: s_min_i32
define void @s_test_imin_slt_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
%cmp = icmp slt <2 x i32> %a, %b
%val = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> %b
store <2 x i32> %val, <2 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}s_test_imin_slt_imm_i32: ; FUNC-LABEL: {{^}}s_test_imin_slt_imm_i32:
; SI: s_min_i32 {{s[0-9]+}}, {{s[0-9]+}}, 8 ; SI: s_min_i32 {{s[0-9]+}}, {{s[0-9]+}}, 8
define void @s_test_imin_slt_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind { define void @s_test_imin_slt_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind {
@ -83,6 +150,24 @@ define void @v_test_umin_ule_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr
ret void ret void
} }
; FUNC-LABEL: @v_test_umin_ule_v3i32
; SI: v_min_u32_e32
; SI: v_min_u32_e32
; SI: v_min_u32_e32
; SI-NOT: v_min_u32_e32
; SI: s_endpgm
define void @v_test_umin_ule_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %aptr, <3 x i32> addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep0 = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %bptr, i32 %tid
%outgep = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %out, i32 %tid
%a = load <3 x i32>, <3 x i32> addrspace(1)* %gep0
%b = load <3 x i32>, <3 x i32> addrspace(1)* %gep1
%cmp = icmp ule <3 x i32> %a, %b
%val = select <3 x i1> %cmp, <3 x i32> %a, <3 x i32> %b
store <3 x i32> %val, <3 x i32> addrspace(1)* %outgep
ret void
}
; FUNC-LABEL: @s_test_umin_ule_i32 ; FUNC-LABEL: @s_test_umin_ule_i32
; SI: s_min_u32 ; SI: s_min_u32
define void @s_test_umin_ule_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { define void @s_test_umin_ule_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
@ -137,6 +222,48 @@ define void @v_test_umin_ult_i32_multi_use(i32 addrspace(1)* %out0, i1 addrspace
ret void ret void
} }
; FUNC-LABEL: @s_test_umin_ult_v1i32
; SI: s_min_u32
define void @s_test_umin_ult_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind {
%cmp = icmp ult <1 x i32> %a, %b
%val = select <1 x i1> %cmp, <1 x i32> %a, <1 x i32> %b
store <1 x i32> %val, <1 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}s_test_umin_ult_v8i32:
; SI: s_min_u32
; SI: s_min_u32
; SI: s_min_u32
; SI: s_min_u32
; SI: s_min_u32
; SI: s_min_u32
; SI: s_min_u32
; SI: s_min_u32
define void @s_test_umin_ult_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) nounwind {
%cmp = icmp ult <8 x i32> %a, %b
%val = select <8 x i1> %cmp, <8 x i32> %a, <8 x i32> %b
store <8 x i32> %val, <8 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}s_test_umin_ult_v8i16:
; SI: v_min_u32
; SI: v_min_u32
; SI: v_min_u32
; SI: v_min_u32
; SI: v_min_u32
; SI: v_min_u32
; SI: v_min_u32
; SI: v_min_u32
define void @s_test_umin_ult_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> %a, <8 x i16> %b) nounwind {
%cmp = icmp ult <8 x i16> %a, %b
%val = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
store <8 x i16> %val, <8 x i16> addrspace(1)* %out
ret void
}
; Make sure redundant and removed ; Make sure redundant and removed
; FUNC-LABEL: {{^}}simplify_demanded_bits_test_umin_ult_i16: ; FUNC-LABEL: {{^}}simplify_demanded_bits_test_umin_ult_i16:
; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
@ -173,14 +300,8 @@ define void @simplify_demanded_bits_test_min_slt_i16(i32 addrspace(1)* %out, i16
ret void ret void
} }
; FIXME: Should get match min/max through extends inserted by
; legalization.
; FUNC-LABEL: {{^}}s_test_imin_sle_i16: ; FUNC-LABEL: {{^}}s_test_imin_sle_i16:
; SI: s_sext_i32_i16 ; SI: s_min_i32
; SI: s_sext_i32_i16
; SI: v_cmp_le_i32_e32
; SI: v_cndmask_b32
define void @s_test_imin_sle_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind { define void @s_test_imin_sle_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind {
%cmp = icmp sle i16 %a, %b %cmp = icmp sle i16 %a, %b
%val = select i1 %cmp, i16 %a, i16 %b %val = select i1 %cmp, i16 %a, i16 %b

View File

@ -28,17 +28,10 @@ define void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind
} }
; FUNC-LABEL: {{^}}s_abs_v2i32: ; FUNC-LABEL: {{^}}s_abs_v2i32:
; TODO: this should use s_abs_i32 ; GCN: s_abs_i32
; GCNX: s_abs_i32 ; GCN: s_abs_i32
; GCNX: s_abs_i32 ; GCN: s_add_i32
; GCN: s_sub ; GCN: s_add_i32
; GCN: s_sub
; GCN-DAG: v_cmp_gt
; GCN-DAG: v_cmp_gt
; GCN-DAG: v_cndmask_b32
; GCN-DAG: v_cndmask_b32
; GCN: v_add_i32
; GCN: v_add_i32
define void @s_abs_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %val) nounwind { define void @s_abs_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %val) nounwind {
%z0 = insertelement <2 x i32> undef, i32 0, i32 0 %z0 = insertelement <2 x i32> undef, i32 0, i32 0
%z1 = insertelement <2 x i32> %z0, i32 0, i32 1 %z1 = insertelement <2 x i32> %z0, i32 0, i32 1
@ -55,13 +48,10 @@ define void @s_abs_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %val) nounwind
; FUNC-LABEL: {{^}}v_abs_v2i32: ; FUNC-LABEL: {{^}}v_abs_v2i32:
; GCN: v_sub_i32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]] ; GCN: v_sub_i32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]]
; GCN: v_sub_i32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]] ; GCN: v_sub_i32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]]
; TODO: this should use v_max_i32
; GCNX: v_max_i32_e32 {{v[0-9]+}}, [[NEG0]], [[SRC0]] ; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG0]], [[SRC0]]
; GCNX: v_max_i32_e32 {{v[0-9]+}}, [[NEG1]], [[SRC1]] ; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG1]], [[SRC1]]
; GCN-DAG: v_cmp_gt
; GCN-DAG: v_cmp_gt
; GCN-DAG: v_cndmask_b32
; GCN-DAG: v_cndmask_b32
; GCN: v_add_i32 ; GCN: v_add_i32
; GCN: v_add_i32 ; GCN: v_add_i32
define void @v_abs_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %src) nounwind { define void @v_abs_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %src) nounwind {
@ -80,24 +70,15 @@ define void @v_abs_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %
; FUNC-LABEL: {{^}}s_abs_v4i32: ; FUNC-LABEL: {{^}}s_abs_v4i32:
; TODO: this should use s_abs_i32 ; TODO: this should use s_abs_i32
; GCNX: s_abs_i32 ; GCN: s_abs_i32
; GCNX: s_abs_i32 ; GCN: s_abs_i32
; GCNX: s_abs_i32 ; GCN: s_abs_i32
; GCNX: s_abs_i32 ; GCN: s_abs_i32
; GCN: s_sub
; GCN: s_sub ; GCN: s_add_i32
; GCN: s_sub ; GCN: s_add_i32
; GCN: s_sub ; GCN: s_add_i32
; GCN-DAG: v_cmp_gt ; GCN: s_add_i32
; GCN-DAG: v_cmp_gt
; GCN-DAG: v_cmp_gt
; GCN-DAG: v_cmp_gt
; GCN-DAG: v_cndmask_b32
; GCN-DAG: v_cndmask_b32
; GCN-DAG: v_cndmask_b32
; GCN-DAG: v_cndmask_b32
; GCN: v_add_i32
; GCN: v_add_i32
define void @s_abs_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %val) nounwind { define void @s_abs_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %val) nounwind {
%z0 = insertelement <4 x i32> undef, i32 0, i32 0 %z0 = insertelement <4 x i32> undef, i32 0, i32 0
%z1 = insertelement <4 x i32> %z0, i32 0, i32 1 %z1 = insertelement <4 x i32> %z0, i32 0, i32 1
@ -120,19 +101,12 @@ define void @s_abs_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %val) nounwind
; GCN: v_sub_i32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]] ; GCN: v_sub_i32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]]
; GCN: v_sub_i32_e32 [[NEG2:v[0-9]+]], vcc, 0, [[SRC2:v[0-9]+]] ; GCN: v_sub_i32_e32 [[NEG2:v[0-9]+]], vcc, 0, [[SRC2:v[0-9]+]]
; GCN: v_sub_i32_e32 [[NEG3:v[0-9]+]], vcc, 0, [[SRC3:v[0-9]+]] ; GCN: v_sub_i32_e32 [[NEG3:v[0-9]+]], vcc, 0, [[SRC3:v[0-9]+]]
; TODO: this should use v_max_i32
; GCNX: v_max_i32_e32 {{v[0-9]+}}, [[NEG0]], [[SRC0]] ; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG0]], [[SRC0]]
; GCNX: v_max_i32_e32 {{v[0-9]+}}, [[NEG1]], [[SRC1]] ; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG1]], [[SRC1]]
; GCNX: v_max_i32_e32 {{v[0-9]+}}, [[NEG2]], [[SRC2]] ; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG2]], [[SRC2]]
; GCNX: v_max_i32_e32 {{v[0-9]+}}, [[NEG3]], [[SRC3]] ; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG3]], [[SRC3]]
; GCN-DAG: v_cmp_gt
; GCN-DAG: v_cmp_gt
; GCN-DAG: v_cmp_gt
; GCN-DAG: v_cmp_gt
; GCN-DAG: v_cndmask_b32
; GCN-DAG: v_cndmask_b32
; GCN-DAG: v_cndmask_b32
; GCN-DAG: v_cndmask_b32
; GCN: v_add_i32 ; GCN: v_add_i32
; GCN: v_add_i32 ; GCN: v_add_i32
; GCN: v_add_i32 ; GCN: v_add_i32