forked from OSchip/llvm-project
[X86] Allow VPMAXUQ/VPMAXSQ/VPMINUQ/VPMINSQ to be used with 128/256 bit vectors when AVX512 is enabled.
These instructions can be used by widening to 512-bits and extracting back to 128/256. We do similar to several other instructions already. llvm-svn: 319641
This commit is contained in:
parent
1151facf76
commit
4520d4f8ad
|
@ -1326,6 +1326,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||||
setOperationAction(ISD::CTTZ, VT, Custom);
|
setOperationAction(ISD::CTTZ, VT, Custom);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NonVLX sub-targets extend 128/256 vectors to use the 512 version.
|
||||||
|
for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
|
||||||
|
setOperationAction(ISD::SMAX, VT, Legal);
|
||||||
|
setOperationAction(ISD::UMAX, VT, Legal);
|
||||||
|
setOperationAction(ISD::SMIN, VT, Legal);
|
||||||
|
setOperationAction(ISD::UMIN, VT, Legal);
|
||||||
|
}
|
||||||
|
|
||||||
// NonVLX sub-targets extend 128/256 vectors to use the 512 version.
|
// NonVLX sub-targets extend 128/256 vectors to use the 512 version.
|
||||||
for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v16i32, MVT::v2i64, MVT::v4i64,
|
for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v16i32, MVT::v2i64, MVT::v4i64,
|
||||||
MVT::v8i64}) {
|
MVT::v8i64}) {
|
||||||
|
@ -1532,13 +1540,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||||
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom);
|
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom);
|
||||||
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom);
|
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom);
|
||||||
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom);
|
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom);
|
||||||
|
|
||||||
for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
|
|
||||||
setOperationAction(ISD::SMAX, VT, Legal);
|
|
||||||
setOperationAction(ISD::UMAX, VT, Legal);
|
|
||||||
setOperationAction(ISD::SMIN, VT, Legal);
|
|
||||||
setOperationAction(ISD::UMIN, VT, Legal);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// We want to custom lower some of our intrinsics.
|
// We want to custom lower some of our intrinsics.
|
||||||
|
@ -17696,7 +17697,8 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
|
||||||
// Special case: Use min/max operations for SETULE/SETUGE
|
// Special case: Use min/max operations for SETULE/SETUGE
|
||||||
MVT VET = VT.getVectorElementType();
|
MVT VET = VT.getVectorElementType();
|
||||||
bool HasMinMax =
|
bool HasMinMax =
|
||||||
(Subtarget.hasSSE41() && (VET >= MVT::i8 && VET <= MVT::i32)) ||
|
(Subtarget.hasAVX512() && VET == MVT::i64) ||
|
||||||
|
(Subtarget.hasSSE41() && (VET == MVT::i16 || VET == MVT::i32)) ||
|
||||||
(Subtarget.hasSSE2() && (VET == MVT::i8));
|
(Subtarget.hasSSE2() && (VET == MVT::i8));
|
||||||
bool MinMax = false;
|
bool MinMax = false;
|
||||||
if (HasMinMax) {
|
if (HasMinMax) {
|
||||||
|
|
|
@ -4528,6 +4528,46 @@ let Predicates = [HasDQI, NoVLX] in {
|
||||||
sub_xmm)>;
|
sub_xmm)>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
|
||||||
|
let Predicates = [HasDQI, NoVLX] in {
|
||||||
|
def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
|
||||||
|
(EXTRACT_SUBREG
|
||||||
|
(VPMULLQZrr
|
||||||
|
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
|
||||||
|
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
|
||||||
|
sub_ymm)>;
|
||||||
|
|
||||||
|
def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
|
||||||
|
(EXTRACT_SUBREG
|
||||||
|
(VPMULLQZrr
|
||||||
|
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
|
||||||
|
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
|
||||||
|
sub_xmm)>;
|
||||||
|
}
|
||||||
|
|
||||||
|
multiclass avx512_min_max_lowering<Instruction Instr, SDNode OpNode> {
|
||||||
|
def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
|
||||||
|
(EXTRACT_SUBREG
|
||||||
|
(Instr
|
||||||
|
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
|
||||||
|
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
|
||||||
|
sub_ymm)>;
|
||||||
|
|
||||||
|
def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
|
||||||
|
(EXTRACT_SUBREG
|
||||||
|
(Instr
|
||||||
|
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
|
||||||
|
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
|
||||||
|
sub_xmm)>;
|
||||||
|
}
|
||||||
|
|
||||||
|
let Predicates = [HasAVX512] in {
|
||||||
|
defm : avx512_min_max_lowering<VPMAXUQZrr, umax>;
|
||||||
|
defm : avx512_min_max_lowering<VPMINUQZrr, umin>;
|
||||||
|
defm : avx512_min_max_lowering<VPMAXSQZrr, smax>;
|
||||||
|
defm : avx512_min_max_lowering<VPMINSQZrr, smin>;
|
||||||
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// AVX-512 Logical Instructions
|
// AVX-512 Logical Instructions
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
|
@ -270,20 +270,39 @@ define <2 x i64> @ge_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
|
||||||
; SSE42-NEXT: pxor %xmm2, %xmm0
|
; SSE42-NEXT: pxor %xmm2, %xmm0
|
||||||
; SSE42-NEXT: retq
|
; SSE42-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: ge_v2i64:
|
; AVX1-LABEL: ge_v2i64:
|
||||||
; AVX: # BB#0:
|
; AVX1: # BB#0:
|
||||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
||||||
; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
||||||
; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
||||||
; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
||||||
; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||||
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||||
; AVX-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX2-LABEL: ge_v2i64:
|
||||||
|
; AVX2: # BB#0:
|
||||||
|
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
||||||
|
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
||||||
|
; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
||||||
|
; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
||||||
|
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||||
|
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||||
|
; AVX2-NEXT: retq
|
||||||
;
|
;
|
||||||
; XOP-LABEL: ge_v2i64:
|
; XOP-LABEL: ge_v2i64:
|
||||||
; XOP: # BB#0:
|
; XOP: # BB#0:
|
||||||
; XOP-NEXT: vpcomgeuq %xmm1, %xmm0, %xmm0
|
; XOP-NEXT: vpcomgeuq %xmm1, %xmm0, %xmm0
|
||||||
; XOP-NEXT: retq
|
; XOP-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX512-LABEL: ge_v2i64:
|
||||||
|
; AVX512: # BB#0:
|
||||||
|
; AVX512-NEXT: # kill: %xmm1<def> %xmm1<kill> %zmm1<def>
|
||||||
|
; AVX512-NEXT: # kill: %xmm0<def> %xmm0<kill> %zmm0<def>
|
||||||
|
; AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm1
|
||||||
|
; AVX512-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
|
||||||
|
; AVX512-NEXT: vzeroupper
|
||||||
|
; AVX512-NEXT: retq
|
||||||
%1 = icmp uge <2 x i64> %a, %b
|
%1 = icmp uge <2 x i64> %a, %b
|
||||||
%2 = sext <2 x i1> %1 to <2 x i64>
|
%2 = sext <2 x i1> %1 to <2 x i64>
|
||||||
ret <2 x i64> %2
|
ret <2 x i64> %2
|
||||||
|
@ -587,20 +606,39 @@ define <2 x i64> @le_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
|
||||||
; SSE42-NEXT: pxor %xmm1, %xmm0
|
; SSE42-NEXT: pxor %xmm1, %xmm0
|
||||||
; SSE42-NEXT: retq
|
; SSE42-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: le_v2i64:
|
; AVX1-LABEL: le_v2i64:
|
||||||
; AVX: # BB#0:
|
; AVX1: # BB#0:
|
||||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
||||||
; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
||||||
; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
||||||
; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
|
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
|
||||||
; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||||
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||||
; AVX-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX2-LABEL: le_v2i64:
|
||||||
|
; AVX2: # BB#0:
|
||||||
|
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
||||||
|
; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
||||||
|
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
||||||
|
; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
|
||||||
|
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||||
|
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||||
|
; AVX2-NEXT: retq
|
||||||
;
|
;
|
||||||
; XOP-LABEL: le_v2i64:
|
; XOP-LABEL: le_v2i64:
|
||||||
; XOP: # BB#0:
|
; XOP: # BB#0:
|
||||||
; XOP-NEXT: vpcomleuq %xmm1, %xmm0, %xmm0
|
; XOP-NEXT: vpcomleuq %xmm1, %xmm0, %xmm0
|
||||||
; XOP-NEXT: retq
|
; XOP-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX512-LABEL: le_v2i64:
|
||||||
|
; AVX512: # BB#0:
|
||||||
|
; AVX512-NEXT: # kill: %xmm1<def> %xmm1<kill> %zmm1<def>
|
||||||
|
; AVX512-NEXT: # kill: %xmm0<def> %xmm0<kill> %zmm0<def>
|
||||||
|
; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm1
|
||||||
|
; AVX512-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
|
||||||
|
; AVX512-NEXT: vzeroupper
|
||||||
|
; AVX512-NEXT: retq
|
||||||
%1 = icmp ule <2 x i64> %a, %b
|
%1 = icmp ule <2 x i64> %a, %b
|
||||||
%2 = sext <2 x i1> %1 to <2 x i64>
|
%2 = sext <2 x i1> %1 to <2 x i64>
|
||||||
ret <2 x i64> %2
|
ret <2 x i64> %2
|
||||||
|
|
|
@ -58,11 +58,26 @@ define <2 x i64> @max_gt_v2i64(<2 x i64> %a, <2 x i64> %b) {
|
||||||
; SSE42-NEXT: movapd %xmm1, %xmm0
|
; SSE42-NEXT: movapd %xmm1, %xmm0
|
||||||
; SSE42-NEXT: retq
|
; SSE42-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: max_gt_v2i64:
|
; AVX1-LABEL: max_gt_v2i64:
|
||||||
; AVX: # BB#0:
|
; AVX1: # BB#0:
|
||||||
; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
|
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
|
||||||
; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||||
; AVX-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX2-LABEL: max_gt_v2i64:
|
||||||
|
; AVX2: # BB#0:
|
||||||
|
; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
|
||||||
|
; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||||
|
; AVX2-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX512-LABEL: max_gt_v2i64:
|
||||||
|
; AVX512: # BB#0:
|
||||||
|
; AVX512-NEXT: # kill: %xmm1<def> %xmm1<kill> %zmm1<def>
|
||||||
|
; AVX512-NEXT: # kill: %xmm0<def> %xmm0<kill> %zmm0<def>
|
||||||
|
; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
|
||||||
|
; AVX512-NEXT: # kill: %xmm0<def> %xmm0<kill> %zmm0<kill>
|
||||||
|
; AVX512-NEXT: vzeroupper
|
||||||
|
; AVX512-NEXT: retq
|
||||||
%1 = icmp sgt <2 x i64> %a, %b
|
%1 = icmp sgt <2 x i64> %a, %b
|
||||||
%2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
|
%2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
|
||||||
ret <2 x i64> %2
|
ret <2 x i64> %2
|
||||||
|
@ -168,8 +183,10 @@ define <4 x i64> @max_gt_v4i64(<4 x i64> %a, <4 x i64> %b) {
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: max_gt_v4i64:
|
; AVX512-LABEL: max_gt_v4i64:
|
||||||
; AVX512: # BB#0:
|
; AVX512: # BB#0:
|
||||||
; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
|
; AVX512-NEXT: # kill: %ymm1<def> %ymm1<kill> %zmm1<def>
|
||||||
; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
; AVX512-NEXT: # kill: %ymm0<def> %ymm0<kill> %zmm0<def>
|
||||||
|
; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
|
||||||
|
; AVX512-NEXT: # kill: %ymm0<def> %ymm0<kill> %zmm0<kill>
|
||||||
; AVX512-NEXT: retq
|
; AVX512-NEXT: retq
|
||||||
%1 = icmp sgt <4 x i64> %a, %b
|
%1 = icmp sgt <4 x i64> %a, %b
|
||||||
%2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
|
%2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
|
||||||
|
@ -444,13 +461,30 @@ define <2 x i64> @max_ge_v2i64(<2 x i64> %a, <2 x i64> %b) {
|
||||||
; SSE42-NEXT: movapd %xmm1, %xmm0
|
; SSE42-NEXT: movapd %xmm1, %xmm0
|
||||||
; SSE42-NEXT: retq
|
; SSE42-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: max_ge_v2i64:
|
; AVX1-LABEL: max_ge_v2i64:
|
||||||
; AVX: # BB#0:
|
; AVX1: # BB#0:
|
||||||
; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
|
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
|
||||||
; AVX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
|
||||||
; AVX-NEXT: vpxor %xmm3, %xmm2, %xmm2
|
; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
|
||||||
; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||||
; AVX-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX2-LABEL: max_ge_v2i64:
|
||||||
|
; AVX2: # BB#0:
|
||||||
|
; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
|
||||||
|
; AVX2-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
|
||||||
|
; AVX2-NEXT: vpxor %xmm3, %xmm2, %xmm2
|
||||||
|
; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||||
|
; AVX2-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX512-LABEL: max_ge_v2i64:
|
||||||
|
; AVX512: # BB#0:
|
||||||
|
; AVX512-NEXT: # kill: %xmm1<def> %xmm1<kill> %zmm1<def>
|
||||||
|
; AVX512-NEXT: # kill: %xmm0<def> %xmm0<kill> %zmm0<def>
|
||||||
|
; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
|
||||||
|
; AVX512-NEXT: # kill: %xmm0<def> %xmm0<kill> %zmm0<kill>
|
||||||
|
; AVX512-NEXT: vzeroupper
|
||||||
|
; AVX512-NEXT: retq
|
||||||
%1 = icmp sge <2 x i64> %a, %b
|
%1 = icmp sge <2 x i64> %a, %b
|
||||||
%2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
|
%2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
|
||||||
ret <2 x i64> %2
|
ret <2 x i64> %2
|
||||||
|
@ -574,10 +608,10 @@ define <4 x i64> @max_ge_v4i64(<4 x i64> %a, <4 x i64> %b) {
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: max_ge_v4i64:
|
; AVX512-LABEL: max_ge_v4i64:
|
||||||
; AVX512: # BB#0:
|
; AVX512: # BB#0:
|
||||||
; AVX512-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
|
; AVX512-NEXT: # kill: %ymm1<def> %ymm1<kill> %zmm1<def>
|
||||||
; AVX512-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
|
; AVX512-NEXT: # kill: %ymm0<def> %ymm0<kill> %zmm0<def>
|
||||||
; AVX512-NEXT: vpxor %ymm3, %ymm2, %ymm2
|
; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
|
||||||
; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
; AVX512-NEXT: # kill: %ymm0<def> %ymm0<kill> %zmm0<kill>
|
||||||
; AVX512-NEXT: retq
|
; AVX512-NEXT: retq
|
||||||
%1 = icmp sge <4 x i64> %a, %b
|
%1 = icmp sge <4 x i64> %a, %b
|
||||||
%2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
|
%2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
|
||||||
|
@ -857,11 +891,26 @@ define <2 x i64> @min_lt_v2i64(<2 x i64> %a, <2 x i64> %b) {
|
||||||
; SSE42-NEXT: movapd %xmm1, %xmm0
|
; SSE42-NEXT: movapd %xmm1, %xmm0
|
||||||
; SSE42-NEXT: retq
|
; SSE42-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: min_lt_v2i64:
|
; AVX1-LABEL: min_lt_v2i64:
|
||||||
; AVX: # BB#0:
|
; AVX1: # BB#0:
|
||||||
; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
|
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
|
||||||
; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||||
; AVX-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX2-LABEL: min_lt_v2i64:
|
||||||
|
; AVX2: # BB#0:
|
||||||
|
; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
|
||||||
|
; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||||
|
; AVX2-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX512-LABEL: min_lt_v2i64:
|
||||||
|
; AVX512: # BB#0:
|
||||||
|
; AVX512-NEXT: # kill: %xmm1<def> %xmm1<kill> %zmm1<def>
|
||||||
|
; AVX512-NEXT: # kill: %xmm0<def> %xmm0<kill> %zmm0<def>
|
||||||
|
; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
|
||||||
|
; AVX512-NEXT: # kill: %xmm0<def> %xmm0<kill> %zmm0<kill>
|
||||||
|
; AVX512-NEXT: vzeroupper
|
||||||
|
; AVX512-NEXT: retq
|
||||||
%1 = icmp slt <2 x i64> %a, %b
|
%1 = icmp slt <2 x i64> %a, %b
|
||||||
%2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
|
%2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
|
||||||
ret <2 x i64> %2
|
ret <2 x i64> %2
|
||||||
|
@ -968,8 +1017,10 @@ define <4 x i64> @min_lt_v4i64(<4 x i64> %a, <4 x i64> %b) {
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: min_lt_v4i64:
|
; AVX512-LABEL: min_lt_v4i64:
|
||||||
; AVX512: # BB#0:
|
; AVX512: # BB#0:
|
||||||
; AVX512-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
|
; AVX512-NEXT: # kill: %ymm1<def> %ymm1<kill> %zmm1<def>
|
||||||
; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
; AVX512-NEXT: # kill: %ymm0<def> %ymm0<kill> %zmm0<def>
|
||||||
|
; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
|
||||||
|
; AVX512-NEXT: # kill: %ymm0<def> %ymm0<kill> %zmm0<kill>
|
||||||
; AVX512-NEXT: retq
|
; AVX512-NEXT: retq
|
||||||
%1 = icmp slt <4 x i64> %a, %b
|
%1 = icmp slt <4 x i64> %a, %b
|
||||||
%2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
|
%2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
|
||||||
|
@ -1237,13 +1288,30 @@ define <2 x i64> @min_le_v2i64(<2 x i64> %a, <2 x i64> %b) {
|
||||||
; SSE42-NEXT: movapd %xmm1, %xmm0
|
; SSE42-NEXT: movapd %xmm1, %xmm0
|
||||||
; SSE42-NEXT: retq
|
; SSE42-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: min_le_v2i64:
|
; AVX1-LABEL: min_le_v2i64:
|
||||||
; AVX: # BB#0:
|
; AVX1: # BB#0:
|
||||||
; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
|
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
|
||||||
; AVX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
|
||||||
; AVX-NEXT: vpxor %xmm3, %xmm2, %xmm2
|
; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
|
||||||
; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||||
; AVX-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX2-LABEL: min_le_v2i64:
|
||||||
|
; AVX2: # BB#0:
|
||||||
|
; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
|
||||||
|
; AVX2-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
|
||||||
|
; AVX2-NEXT: vpxor %xmm3, %xmm2, %xmm2
|
||||||
|
; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||||
|
; AVX2-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX512-LABEL: min_le_v2i64:
|
||||||
|
; AVX512: # BB#0:
|
||||||
|
; AVX512-NEXT: # kill: %xmm1<def> %xmm1<kill> %zmm1<def>
|
||||||
|
; AVX512-NEXT: # kill: %xmm0<def> %xmm0<kill> %zmm0<def>
|
||||||
|
; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
|
||||||
|
; AVX512-NEXT: # kill: %xmm0<def> %xmm0<kill> %zmm0<kill>
|
||||||
|
; AVX512-NEXT: vzeroupper
|
||||||
|
; AVX512-NEXT: retq
|
||||||
%1 = icmp sle <2 x i64> %a, %b
|
%1 = icmp sle <2 x i64> %a, %b
|
||||||
%2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
|
%2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
|
||||||
ret <2 x i64> %2
|
ret <2 x i64> %2
|
||||||
|
@ -1366,10 +1434,10 @@ define <4 x i64> @min_le_v4i64(<4 x i64> %a, <4 x i64> %b) {
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: min_le_v4i64:
|
; AVX512-LABEL: min_le_v4i64:
|
||||||
; AVX512: # BB#0:
|
; AVX512: # BB#0:
|
||||||
; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
|
; AVX512-NEXT: # kill: %ymm1<def> %ymm1<kill> %zmm1<def>
|
||||||
; AVX512-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
|
; AVX512-NEXT: # kill: %ymm0<def> %ymm0<kill> %zmm0<def>
|
||||||
; AVX512-NEXT: vpxor %ymm3, %ymm2, %ymm2
|
; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
|
||||||
; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
; AVX512-NEXT: # kill: %ymm0<def> %ymm0<kill> %zmm0<kill>
|
||||||
; AVX512-NEXT: retq
|
; AVX512-NEXT: retq
|
||||||
%1 = icmp sle <4 x i64> %a, %b
|
%1 = icmp sle <4 x i64> %a, %b
|
||||||
%2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
|
%2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
|
||||||
|
|
|
@ -62,14 +62,32 @@ define <2 x i64> @max_gt_v2i64(<2 x i64> %a, <2 x i64> %b) {
|
||||||
; SSE42-NEXT: movapd %xmm1, %xmm0
|
; SSE42-NEXT: movapd %xmm1, %xmm0
|
||||||
; SSE42-NEXT: retq
|
; SSE42-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: max_gt_v2i64:
|
; AVX1-LABEL: max_gt_v2i64:
|
||||||
; AVX: # BB#0:
|
; AVX1: # BB#0:
|
||||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
||||||
; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm3
|
; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm3
|
||||||
; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm2
|
; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm2
|
||||||
; AVX-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
|
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
|
||||||
; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||||
; AVX-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX2-LABEL: max_gt_v2i64:
|
||||||
|
; AVX2: # BB#0:
|
||||||
|
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
||||||
|
; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3
|
||||||
|
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2
|
||||||
|
; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
|
||||||
|
; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||||
|
; AVX2-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX512-LABEL: max_gt_v2i64:
|
||||||
|
; AVX512: # BB#0:
|
||||||
|
; AVX512-NEXT: # kill: %xmm1<def> %xmm1<kill> %zmm1<def>
|
||||||
|
; AVX512-NEXT: # kill: %xmm0<def> %xmm0<kill> %zmm0<def>
|
||||||
|
; AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
|
||||||
|
; AVX512-NEXT: # kill: %xmm0<def> %xmm0<kill> %zmm0<kill>
|
||||||
|
; AVX512-NEXT: vzeroupper
|
||||||
|
; AVX512-NEXT: retq
|
||||||
%1 = icmp ugt <2 x i64> %a, %b
|
%1 = icmp ugt <2 x i64> %a, %b
|
||||||
%2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
|
%2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
|
||||||
ret <2 x i64> %2
|
ret <2 x i64> %2
|
||||||
|
@ -190,11 +208,10 @@ define <4 x i64> @max_gt_v4i64(<4 x i64> %a, <4 x i64> %b) {
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: max_gt_v4i64:
|
; AVX512-LABEL: max_gt_v4i64:
|
||||||
; AVX512: # BB#0:
|
; AVX512: # BB#0:
|
||||||
; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
|
; AVX512-NEXT: # kill: %ymm1<def> %ymm1<kill> %zmm1<def>
|
||||||
; AVX512-NEXT: vpxor %ymm2, %ymm1, %ymm3
|
; AVX512-NEXT: # kill: %ymm0<def> %ymm0<kill> %zmm0<def>
|
||||||
; AVX512-NEXT: vpxor %ymm2, %ymm0, %ymm2
|
; AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
|
||||||
; AVX512-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
|
; AVX512-NEXT: # kill: %ymm0<def> %ymm0<kill> %zmm0<kill>
|
||||||
; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
|
||||||
; AVX512-NEXT: retq
|
; AVX512-NEXT: retq
|
||||||
%1 = icmp ugt <4 x i64> %a, %b
|
%1 = icmp ugt <4 x i64> %a, %b
|
||||||
%2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
|
%2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
|
||||||
|
@ -485,16 +502,36 @@ define <2 x i64> @max_ge_v2i64(<2 x i64> %a, <2 x i64> %b) {
|
||||||
; SSE42-NEXT: movapd %xmm1, %xmm0
|
; SSE42-NEXT: movapd %xmm1, %xmm0
|
||||||
; SSE42-NEXT: retq
|
; SSE42-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: max_ge_v2i64:
|
; AVX1-LABEL: max_ge_v2i64:
|
||||||
; AVX: # BB#0:
|
; AVX1: # BB#0:
|
||||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
||||||
; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm3
|
; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm3
|
||||||
; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm2
|
; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm2
|
||||||
; AVX-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
|
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
|
||||||
; AVX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
|
||||||
; AVX-NEXT: vpxor %xmm3, %xmm2, %xmm2
|
; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
|
||||||
; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||||
; AVX-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX2-LABEL: max_ge_v2i64:
|
||||||
|
; AVX2: # BB#0:
|
||||||
|
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
||||||
|
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
|
||||||
|
; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2
|
||||||
|
; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
|
||||||
|
; AVX2-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
|
||||||
|
; AVX2-NEXT: vpxor %xmm3, %xmm2, %xmm2
|
||||||
|
; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||||
|
; AVX2-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX512-LABEL: max_ge_v2i64:
|
||||||
|
; AVX512: # BB#0:
|
||||||
|
; AVX512-NEXT: # kill: %xmm1<def> %xmm1<kill> %zmm1<def>
|
||||||
|
; AVX512-NEXT: # kill: %xmm0<def> %xmm0<kill> %zmm0<def>
|
||||||
|
; AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
|
||||||
|
; AVX512-NEXT: # kill: %xmm0<def> %xmm0<kill> %zmm0<kill>
|
||||||
|
; AVX512-NEXT: vzeroupper
|
||||||
|
; AVX512-NEXT: retq
|
||||||
%1 = icmp uge <2 x i64> %a, %b
|
%1 = icmp uge <2 x i64> %a, %b
|
||||||
%2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
|
%2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
|
||||||
ret <2 x i64> %2
|
ret <2 x i64> %2
|
||||||
|
@ -632,13 +669,10 @@ define <4 x i64> @max_ge_v4i64(<4 x i64> %a, <4 x i64> %b) {
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: max_ge_v4i64:
|
; AVX512-LABEL: max_ge_v4i64:
|
||||||
; AVX512: # BB#0:
|
; AVX512: # BB#0:
|
||||||
; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
|
; AVX512-NEXT: # kill: %ymm1<def> %ymm1<kill> %zmm1<def>
|
||||||
; AVX512-NEXT: vpxor %ymm2, %ymm0, %ymm3
|
; AVX512-NEXT: # kill: %ymm0<def> %ymm0<kill> %zmm0<def>
|
||||||
; AVX512-NEXT: vpxor %ymm2, %ymm1, %ymm2
|
; AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
|
||||||
; AVX512-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
|
; AVX512-NEXT: # kill: %ymm0<def> %ymm0<kill> %zmm0<kill>
|
||||||
; AVX512-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
|
|
||||||
; AVX512-NEXT: vpxor %ymm3, %ymm2, %ymm2
|
|
||||||
; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
|
||||||
; AVX512-NEXT: retq
|
; AVX512-NEXT: retq
|
||||||
%1 = icmp uge <4 x i64> %a, %b
|
%1 = icmp uge <4 x i64> %a, %b
|
||||||
%2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
|
%2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
|
||||||
|
@ -926,14 +960,32 @@ define <2 x i64> @min_lt_v2i64(<2 x i64> %a, <2 x i64> %b) {
|
||||||
; SSE42-NEXT: movapd %xmm1, %xmm0
|
; SSE42-NEXT: movapd %xmm1, %xmm0
|
||||||
; SSE42-NEXT: retq
|
; SSE42-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: min_lt_v2i64:
|
; AVX1-LABEL: min_lt_v2i64:
|
||||||
; AVX: # BB#0:
|
; AVX1: # BB#0:
|
||||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
||||||
; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm3
|
; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm3
|
||||||
; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm2
|
; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm2
|
||||||
; AVX-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
|
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
|
||||||
; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||||
; AVX-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX2-LABEL: min_lt_v2i64:
|
||||||
|
; AVX2: # BB#0:
|
||||||
|
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
||||||
|
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
|
||||||
|
; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2
|
||||||
|
; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
|
||||||
|
; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||||
|
; AVX2-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX512-LABEL: min_lt_v2i64:
|
||||||
|
; AVX512: # BB#0:
|
||||||
|
; AVX512-NEXT: # kill: %xmm1<def> %xmm1<kill> %zmm1<def>
|
||||||
|
; AVX512-NEXT: # kill: %xmm0<def> %xmm0<kill> %zmm0<def>
|
||||||
|
; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0
|
||||||
|
; AVX512-NEXT: # kill: %xmm0<def> %xmm0<kill> %zmm0<kill>
|
||||||
|
; AVX512-NEXT: vzeroupper
|
||||||
|
; AVX512-NEXT: retq
|
||||||
%1 = icmp ult <2 x i64> %a, %b
|
%1 = icmp ult <2 x i64> %a, %b
|
||||||
%2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
|
%2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
|
||||||
ret <2 x i64> %2
|
ret <2 x i64> %2
|
||||||
|
@ -1054,11 +1106,10 @@ define <4 x i64> @min_lt_v4i64(<4 x i64> %a, <4 x i64> %b) {
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: min_lt_v4i64:
|
; AVX512-LABEL: min_lt_v4i64:
|
||||||
; AVX512: # BB#0:
|
; AVX512: # BB#0:
|
||||||
; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
|
; AVX512-NEXT: # kill: %ymm1<def> %ymm1<kill> %zmm1<def>
|
||||||
; AVX512-NEXT: vpxor %ymm2, %ymm0, %ymm3
|
; AVX512-NEXT: # kill: %ymm0<def> %ymm0<kill> %zmm0<def>
|
||||||
; AVX512-NEXT: vpxor %ymm2, %ymm1, %ymm2
|
; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0
|
||||||
; AVX512-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
|
; AVX512-NEXT: # kill: %ymm0<def> %ymm0<kill> %zmm0<kill>
|
||||||
; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
|
||||||
; AVX512-NEXT: retq
|
; AVX512-NEXT: retq
|
||||||
%1 = icmp ult <4 x i64> %a, %b
|
%1 = icmp ult <4 x i64> %a, %b
|
||||||
%2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
|
%2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
|
||||||
|
@ -1348,16 +1399,36 @@ define <2 x i64> @min_le_v2i64(<2 x i64> %a, <2 x i64> %b) {
|
||||||
; SSE42-NEXT: movapd %xmm1, %xmm0
|
; SSE42-NEXT: movapd %xmm1, %xmm0
|
||||||
; SSE42-NEXT: retq
|
; SSE42-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: min_le_v2i64:
|
; AVX1-LABEL: min_le_v2i64:
|
||||||
; AVX: # BB#0:
|
; AVX1: # BB#0:
|
||||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
||||||
; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm3
|
; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm3
|
||||||
; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm2
|
; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm2
|
||||||
; AVX-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
|
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
|
||||||
; AVX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
|
||||||
; AVX-NEXT: vpxor %xmm3, %xmm2, %xmm2
|
; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
|
||||||
; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||||
; AVX-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX2-LABEL: min_le_v2i64:
|
||||||
|
; AVX2: # BB#0:
|
||||||
|
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
||||||
|
; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3
|
||||||
|
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2
|
||||||
|
; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
|
||||||
|
; AVX2-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
|
||||||
|
; AVX2-NEXT: vpxor %xmm3, %xmm2, %xmm2
|
||||||
|
; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||||
|
; AVX2-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX512-LABEL: min_le_v2i64:
|
||||||
|
; AVX512: # BB#0:
|
||||||
|
; AVX512-NEXT: # kill: %xmm1<def> %xmm1<kill> %zmm1<def>
|
||||||
|
; AVX512-NEXT: # kill: %xmm0<def> %xmm0<kill> %zmm0<def>
|
||||||
|
; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0
|
||||||
|
; AVX512-NEXT: # kill: %xmm0<def> %xmm0<kill> %zmm0<kill>
|
||||||
|
; AVX512-NEXT: vzeroupper
|
||||||
|
; AVX512-NEXT: retq
|
||||||
%1 = icmp ule <2 x i64> %a, %b
|
%1 = icmp ule <2 x i64> %a, %b
|
||||||
%2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
|
%2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
|
||||||
ret <2 x i64> %2
|
ret <2 x i64> %2
|
||||||
|
@ -1495,13 +1566,10 @@ define <4 x i64> @min_le_v4i64(<4 x i64> %a, <4 x i64> %b) {
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: min_le_v4i64:
|
; AVX512-LABEL: min_le_v4i64:
|
||||||
; AVX512: # BB#0:
|
; AVX512: # BB#0:
|
||||||
; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
|
; AVX512-NEXT: # kill: %ymm1<def> %ymm1<kill> %zmm1<def>
|
||||||
; AVX512-NEXT: vpxor %ymm2, %ymm1, %ymm3
|
; AVX512-NEXT: # kill: %ymm0<def> %ymm0<kill> %zmm0<def>
|
||||||
; AVX512-NEXT: vpxor %ymm2, %ymm0, %ymm2
|
; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0
|
||||||
; AVX512-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
|
; AVX512-NEXT: # kill: %ymm0<def> %ymm0<kill> %zmm0<kill>
|
||||||
; AVX512-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
|
|
||||||
; AVX512-NEXT: vpxor %ymm3, %ymm2, %ymm2
|
|
||||||
; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
|
||||||
; AVX512-NEXT: retq
|
; AVX512-NEXT: retq
|
||||||
%1 = icmp ule <4 x i64> %a, %b
|
%1 = icmp ule <4 x i64> %a, %b
|
||||||
%2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
|
%2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
|
||||||
|
|
Loading…
Reference in New Issue