diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 48e6c31be6b1..de3df34ec446 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1326,6 +1326,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CTTZ, VT, Custom); } + // NonVLX sub-targets extend 128/256 vectors to use the 512 version. + for (auto VT : { MVT::v2i64, MVT::v4i64 }) { + setOperationAction(ISD::SMAX, VT, Legal); + setOperationAction(ISD::UMAX, VT, Legal); + setOperationAction(ISD::SMIN, VT, Legal); + setOperationAction(ISD::UMIN, VT, Legal); + } + // NonVLX sub-targets extend 128/256 vectors to use the 512 version. for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v16i32, MVT::v2i64, MVT::v4i64, MVT::v8i64}) { @@ -1532,13 +1540,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom); - - for (auto VT : { MVT::v2i64, MVT::v4i64 }) { - setOperationAction(ISD::SMAX, VT, Legal); - setOperationAction(ISD::UMAX, VT, Legal); - setOperationAction(ISD::SMIN, VT, Legal); - setOperationAction(ISD::UMIN, VT, Legal); - } } // We want to custom lower some of our intrinsics. @@ -17696,7 +17697,8 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, // Special case: Use min/max operations for SETULE/SETUGE MVT VET = VT.getVectorElementType(); bool HasMinMax = - (Subtarget.hasSSE41() && (VET >= MVT::i8 && VET <= MVT::i32)) || + (Subtarget.hasAVX512() && VET == MVT::i64) || + (Subtarget.hasSSE41() && (VET == MVT::i16 || VET == MVT::i32)) || (Subtarget.hasSSE2() && (VET == MVT::i8)); bool MinMax = false; if (HasMinMax) { diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index b0f1d179d221..7a4ba0eae733 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -4528,6 +4528,46 @@ let Predicates = [HasDQI, NoVLX] in { sub_xmm)>; } +// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX. +let Predicates = [HasDQI, NoVLX] in { + def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), + (EXTRACT_SUBREG + (VPMULLQZrr + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), + sub_ymm)>; + + def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), + (EXTRACT_SUBREG + (VPMULLQZrr + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), + sub_xmm)>; +} + +multiclass avx512_min_max_lowering { + def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)), + (EXTRACT_SUBREG + (Instr + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), + sub_ymm)>; + + def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)), + (EXTRACT_SUBREG + (Instr + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), + sub_xmm)>; +} + +let Predicates = [HasAVX512] in { + defm : avx512_min_max_lowering; + defm : avx512_min_max_lowering; + defm : avx512_min_max_lowering; + defm : avx512_min_max_lowering; +} + //===----------------------------------------------------------------------===// // AVX-512 Logical Instructions //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll b/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll index cad7991c4f3b..57e3849a73cc 100644 --- a/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll +++ b/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll @@ -270,20 +270,39 @@ define <2 x i64> @ge_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind { ; SSE42-NEXT: pxor %xmm2, %xmm0 ; SSE42-NEXT: retq ; -; AVX-LABEL: ge_v2i64: -; AVX: # BB#0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: ge_v2i64: +; AVX1: # BB#0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: ge_v2i64: +; AVX2: # BB#0: +; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq ; ; XOP-LABEL: ge_v2i64: ; XOP: # BB#0: ; XOP-NEXT: vpcomgeuq %xmm1, %xmm0, %xmm0 ; XOP-NEXT: retq +; +; AVX512-LABEL: ge_v2i64: +; AVX512: # BB#0: +; AVX512-NEXT: # kill: %xmm1 %xmm1 %zmm1 +; AVX512-NEXT: # kill: %xmm0 %xmm0 %zmm0 +; AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm1 +; AVX512-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %1 = icmp uge <2 x i64> %a, %b %2 = sext <2 x i1> %1 to <2 x i64> ret <2 x i64> %2 @@ -587,20 +606,39 @@ define <2 x i64> @le_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind { ; SSE42-NEXT: pxor %xmm1, %xmm0 ; SSE42-NEXT: retq ; -; AVX-LABEL: le_v2i64: -; AVX: # BB#0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: le_v2i64: +; AVX1: # BB#0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: le_v2i64: +; AVX2: # BB#0: +; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq ; ; XOP-LABEL: le_v2i64: ; XOP: # BB#0: ; XOP-NEXT: vpcomleuq %xmm1, %xmm0, %xmm0 ; XOP-NEXT: retq +; +; AVX512-LABEL: le_v2i64: +; AVX512: # BB#0: +; AVX512-NEXT: # kill: %xmm1 %xmm1 %zmm1 +; AVX512-NEXT: # kill: %xmm0 %xmm0 %zmm0 +; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm1 +; AVX512-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %1 = icmp ule <2 x i64> %a, %b %2 = sext <2 x i1> %1 to <2 x i64> ret <2 x i64> %2 diff --git a/llvm/test/CodeGen/X86/vec_minmax_sint.ll b/llvm/test/CodeGen/X86/vec_minmax_sint.ll index 5999116deb9c..70d60b0075c1 100644 --- a/llvm/test/CodeGen/X86/vec_minmax_sint.ll +++ b/llvm/test/CodeGen/X86/vec_minmax_sint.ll @@ -58,11 +58,26 @@ define <2 x i64> @max_gt_v2i64(<2 x i64> %a, <2 x i64> %b) { ; SSE42-NEXT: movapd %xmm1, %xmm0 ; SSE42-NEXT: retq ; -; AVX-LABEL: max_gt_v2i64: -; AVX: # BB#0: -; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: max_gt_v2i64: +; AVX1: # BB#0: +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: max_gt_v2i64: +; AVX2: # BB#0: +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: max_gt_v2i64: +; AVX512: # BB#0: +; AVX512-NEXT: # kill: %xmm1 %xmm1 %zmm1 +; AVX512-NEXT: # kill: %xmm0 %xmm0 %zmm0 +; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: # kill: %xmm0 %xmm0 %zmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %1 = icmp sgt <2 x i64> %a, %b %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b ret <2 x i64> %2 @@ -168,8 +183,10 @@ define <4 x i64> @max_gt_v4i64(<4 x i64> %a, <4 x i64> %b) { ; ; AVX512-LABEL: max_gt_v4i64: ; AVX512: # BB#0: -; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 -; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX512-NEXT: # kill: %ymm1 %ymm1 %zmm1 +; AVX512-NEXT: # kill: %ymm0 %ymm0 %zmm0 +; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: # kill: %ymm0 %ymm0 %zmm0 ; AVX512-NEXT: retq %1 = icmp sgt <4 x i64> %a, %b %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b @@ -444,13 +461,30 @@ define <2 x i64> @max_ge_v2i64(<2 x i64> %a, <2 x i64> %b) { ; SSE42-NEXT: movapd %xmm1, %xmm0 ; SSE42-NEXT: retq ; -; AVX-LABEL: max_ge_v2i64: -; AVX: # BB#0: -; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 -; AVX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 -; AVX-NEXT: vpxor %xmm3, %xmm2, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: max_ge_v2i64: +; AVX1: # BB#0: +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: max_ge_v2i64: +; AVX2: # BB#0: +; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; AVX2-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 +; AVX2-NEXT: vpxor %xmm3, %xmm2, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: max_ge_v2i64: +; AVX512: # BB#0: +; AVX512-NEXT: # kill: %xmm1 %xmm1 %zmm1 +; AVX512-NEXT: # kill: %xmm0 %xmm0 %zmm0 +; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: # kill: %xmm0 %xmm0 %zmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %1 = icmp sge <2 x i64> %a, %b %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b ret <2 x i64> %2 @@ -574,10 +608,10 @@ define <4 x i64> @max_ge_v4i64(<4 x i64> %a, <4 x i64> %b) { ; ; AVX512-LABEL: max_ge_v4i64: ; AVX512: # BB#0: -; AVX512-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 -; AVX512-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 -; AVX512-NEXT: vpxor %ymm3, %ymm2, %ymm2 -; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX512-NEXT: # kill: %ymm1 %ymm1 %zmm1 +; AVX512-NEXT: # kill: %ymm0 %ymm0 %zmm0 +; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: # kill: %ymm0 %ymm0 %zmm0 ; AVX512-NEXT: retq %1 = icmp sge <4 x i64> %a, %b %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b @@ -857,11 +891,26 @@ define <2 x i64> @min_lt_v2i64(<2 x i64> %a, <2 x i64> %b) { ; SSE42-NEXT: movapd %xmm1, %xmm0 ; SSE42-NEXT: retq ; -; AVX-LABEL: min_lt_v2i64: -; AVX: # BB#0: -; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: min_lt_v2i64: +; AVX1: # BB#0: +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: min_lt_v2i64: +; AVX2: # BB#0: +; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: min_lt_v2i64: +; AVX512: # BB#0: +; AVX512-NEXT: # kill: %xmm1 %xmm1 %zmm1 +; AVX512-NEXT: # kill: %xmm0 %xmm0 %zmm0 +; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: # kill: %xmm0 %xmm0 %zmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %1 = icmp slt <2 x i64> %a, %b %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b ret <2 x i64> %2 @@ -968,8 +1017,10 @@ define <4 x i64> @min_lt_v4i64(<4 x i64> %a, <4 x i64> %b) { ; ; AVX512-LABEL: min_lt_v4i64: ; AVX512: # BB#0: -; AVX512-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 -; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX512-NEXT: # kill: %ymm1 %ymm1 %zmm1 +; AVX512-NEXT: # kill: %ymm0 %ymm0 %zmm0 +; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: # kill: %ymm0 %ymm0 %zmm0 ; AVX512-NEXT: retq %1 = icmp slt <4 x i64> %a, %b %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b @@ -1237,13 +1288,30 @@ define <2 x i64> @min_le_v2i64(<2 x i64> %a, <2 x i64> %b) { ; SSE42-NEXT: movapd %xmm1, %xmm0 ; SSE42-NEXT: retq ; -; AVX-LABEL: min_le_v2i64: -; AVX: # BB#0: -; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 -; AVX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 -; AVX-NEXT: vpxor %xmm3, %xmm2, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: min_le_v2i64: +; AVX1: # BB#0: +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: min_le_v2i64: +; AVX2: # BB#0: +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 +; AVX2-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 +; AVX2-NEXT: vpxor %xmm3, %xmm2, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: min_le_v2i64: +; AVX512: # BB#0: +; AVX512-NEXT: # kill: %xmm1 %xmm1 %zmm1 +; AVX512-NEXT: # kill: %xmm0 %xmm0 %zmm0 +; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: # kill: %xmm0 %xmm0 %zmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %1 = icmp sle <2 x i64> %a, %b %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b ret <2 x i64> %2 @@ -1366,10 +1434,10 @@ define <4 x i64> @min_le_v4i64(<4 x i64> %a, <4 x i64> %b) { ; ; AVX512-LABEL: min_le_v4i64: ; AVX512: # BB#0: -; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 -; AVX512-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 -; AVX512-NEXT: vpxor %ymm3, %ymm2, %ymm2 -; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX512-NEXT: # kill: %ymm1 %ymm1 %zmm1 +; AVX512-NEXT: # kill: %ymm0 %ymm0 %zmm0 +; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: # kill: %ymm0 %ymm0 %zmm0 ; AVX512-NEXT: retq %1 = icmp sle <4 x i64> %a, %b %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b diff --git a/llvm/test/CodeGen/X86/vec_minmax_uint.ll b/llvm/test/CodeGen/X86/vec_minmax_uint.ll index ec5f83ea396c..9782384ebe11 100644 --- a/llvm/test/CodeGen/X86/vec_minmax_uint.ll +++ b/llvm/test/CodeGen/X86/vec_minmax_uint.ll @@ -62,14 +62,32 @@ define <2 x i64> @max_gt_v2i64(<2 x i64> %a, <2 x i64> %b) { ; SSE42-NEXT: movapd %xmm1, %xmm0 ; SSE42-NEXT: retq ; -; AVX-LABEL: max_gt_v2i64: -; AVX: # BB#0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm3 -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm2 -; AVX-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: max_gt_v2i64: +; AVX1: # BB#0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm3 +; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm2 +; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: max_gt_v2i64: +; AVX2: # BB#0: +; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3 +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2 +; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: max_gt_v2i64: +; AVX512: # BB#0: +; AVX512-NEXT: # kill: %xmm1 %xmm1 %zmm1 +; AVX512-NEXT: # kill: %xmm0 %xmm0 %zmm0 +; AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: # kill: %xmm0 %xmm0 %zmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %1 = icmp ugt <2 x i64> %a, %b %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b ret <2 x i64> %2 @@ -190,11 +208,10 @@ define <4 x i64> @max_gt_v4i64(<4 x i64> %a, <4 x i64> %b) { ; ; AVX512-LABEL: max_gt_v4i64: ; AVX512: # BB#0: -; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 -; AVX512-NEXT: vpxor %ymm2, %ymm1, %ymm3 -; AVX512-NEXT: vpxor %ymm2, %ymm0, %ymm2 -; AVX512-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 -; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX512-NEXT: # kill: %ymm1 %ymm1 %zmm1 +; AVX512-NEXT: # kill: %ymm0 %ymm0 %zmm0 +; AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: # kill: %ymm0 %ymm0 %zmm0 ; AVX512-NEXT: retq %1 = icmp ugt <4 x i64> %a, %b %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b @@ -485,16 +502,36 @@ define <2 x i64> @max_ge_v2i64(<2 x i64> %a, <2 x i64> %b) { ; SSE42-NEXT: movapd %xmm1, %xmm0 ; SSE42-NEXT: retq ; -; AVX-LABEL: max_ge_v2i64: -; AVX: # BB#0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm3 -; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm2 -; AVX-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 -; AVX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 -; AVX-NEXT: vpxor %xmm3, %xmm2, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: max_ge_v2i64: +; AVX1: # BB#0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm3 +; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm2 +; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: max_ge_v2i64: +; AVX2: # BB#0: +; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 +; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 +; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 +; AVX2-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 +; AVX2-NEXT: vpxor %xmm3, %xmm2, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: max_ge_v2i64: +; AVX512: # BB#0: +; AVX512-NEXT: # kill: %xmm1 %xmm1 %zmm1 +; AVX512-NEXT: # kill: %xmm0 %xmm0 %zmm0 +; AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: # kill: %xmm0 %xmm0 %zmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %1 = icmp uge <2 x i64> %a, %b %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b ret <2 x i64> %2 @@ -632,13 +669,10 @@ define <4 x i64> @max_ge_v4i64(<4 x i64> %a, <4 x i64> %b) { ; ; AVX512-LABEL: max_ge_v4i64: ; AVX512: # BB#0: -; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 -; AVX512-NEXT: vpxor %ymm2, %ymm0, %ymm3 -; AVX512-NEXT: vpxor %ymm2, %ymm1, %ymm2 -; AVX512-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 -; AVX512-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 -; AVX512-NEXT: vpxor %ymm3, %ymm2, %ymm2 -; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX512-NEXT: # kill: %ymm1 %ymm1 %zmm1 +; AVX512-NEXT: # kill: %ymm0 %ymm0 %zmm0 +; AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: # kill: %ymm0 %ymm0 %zmm0 ; AVX512-NEXT: retq %1 = icmp uge <4 x i64> %a, %b %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b @@ -926,14 +960,32 @@ define <2 x i64> @min_lt_v2i64(<2 x i64> %a, <2 x i64> %b) { ; SSE42-NEXT: movapd %xmm1, %xmm0 ; SSE42-NEXT: retq ; -; AVX-LABEL: min_lt_v2i64: -; AVX: # BB#0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm3 -; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm2 -; AVX-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: min_lt_v2i64: +; AVX1: # BB#0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm3 +; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm2 +; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: min_lt_v2i64: +; AVX2: # BB#0: +; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 +; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 +; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: min_lt_v2i64: +; AVX512: # BB#0: +; AVX512-NEXT: # kill: %xmm1 %xmm1 %zmm1 +; AVX512-NEXT: # kill: %xmm0 %xmm0 %zmm0 +; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: # kill: %xmm0 %xmm0 %zmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %1 = icmp ult <2 x i64> %a, %b %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b ret <2 x i64> %2 @@ -1054,11 +1106,10 @@ define <4 x i64> @min_lt_v4i64(<4 x i64> %a, <4 x i64> %b) { ; ; AVX512-LABEL: min_lt_v4i64: ; AVX512: # BB#0: -; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 -; AVX512-NEXT: vpxor %ymm2, %ymm0, %ymm3 -; AVX512-NEXT: vpxor %ymm2, %ymm1, %ymm2 -; AVX512-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 -; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX512-NEXT: # kill: %ymm1 %ymm1 %zmm1 +; AVX512-NEXT: # kill: %ymm0 %ymm0 %zmm0 +; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: # kill: %ymm0 %ymm0 %zmm0 ; AVX512-NEXT: retq %1 = icmp ult <4 x i64> %a, %b %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b @@ -1348,16 +1399,36 @@ define <2 x i64> @min_le_v2i64(<2 x i64> %a, <2 x i64> %b) { ; SSE42-NEXT: movapd %xmm1, %xmm0 ; SSE42-NEXT: retq ; -; AVX-LABEL: min_le_v2i64: -; AVX: # BB#0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm3 -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm2 -; AVX-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 -; AVX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 -; AVX-NEXT: vpxor %xmm3, %xmm2, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: min_le_v2i64: +; AVX1: # BB#0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm3 +; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm2 +; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: min_le_v2i64: +; AVX2: # BB#0: +; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3 +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2 +; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 +; AVX2-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 +; AVX2-NEXT: vpxor %xmm3, %xmm2, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: min_le_v2i64: +; AVX512: # BB#0: +; AVX512-NEXT: # kill: %xmm1 %xmm1 %zmm1 +; AVX512-NEXT: # kill: %xmm0 %xmm0 %zmm0 +; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: # kill: %xmm0 %xmm0 %zmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %1 = icmp ule <2 x i64> %a, %b %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b ret <2 x i64> %2 @@ -1495,13 +1566,10 @@ define <4 x i64> @min_le_v4i64(<4 x i64> %a, <4 x i64> %b) { ; ; AVX512-LABEL: min_le_v4i64: ; AVX512: # BB#0: -; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 -; AVX512-NEXT: vpxor %ymm2, %ymm1, %ymm3 -; AVX512-NEXT: vpxor %ymm2, %ymm0, %ymm2 -; AVX512-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 -; AVX512-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 -; AVX512-NEXT: vpxor %ymm3, %ymm2, %ymm2 -; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX512-NEXT: # kill: %ymm1 %ymm1 %zmm1 +; AVX512-NEXT: # kill: %ymm0 %ymm0 %zmm0 +; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: # kill: %ymm0 %ymm0 %zmm0 ; AVX512-NEXT: retq %1 = icmp ule <4 x i64> %a, %b %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b