forked from OSchip/llvm-project
[X86] Call lowerShuffleAsBitMask for 512-bit vectors in lowerShuffleAsBlend.
This patch enables the use of lowerShuffleAsBitMask for 512-bit blends before falling back to move immedate, GPR to k-register, and masked op. I had to make some changes to support v8i64 when i64 is not a legal type. And to support floating point types. This trades a load for the move immediate and GPR move which is higher latency. But its probably better for register pressure not having to hop through other register classes. The load+and should play better with LICM and rematerialization I think. Differential Revision: https://reviews.llvm.org/D59479 llvm-svn: 356618
This commit is contained in:
parent
bbcb95a64e
commit
0367553304
|
@ -10364,11 +10364,30 @@ static SDValue lowerShuffleWithPACK(const SDLoc &DL, MVT VT, ArrayRef<int> Mask,
|
|||
/// one of the inputs being zeroable.
|
||||
static SDValue lowerShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1,
|
||||
SDValue V2, ArrayRef<int> Mask,
|
||||
const APInt &Zeroable, SelectionDAG &DAG) {
|
||||
assert(!VT.isFloatingPoint() && "Floating point types are not supported");
|
||||
const APInt &Zeroable,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
MVT MaskVT = VT;
|
||||
MVT EltVT = VT.getVectorElementType();
|
||||
SDValue Zero = DAG.getConstant(0, DL, EltVT);
|
||||
SDValue AllOnes = DAG.getAllOnesConstant(DL, EltVT);
|
||||
SDValue Zero, AllOnes;
|
||||
// Use f64 if i64 isn't legal.
|
||||
if (EltVT == MVT::i64 && !Subtarget.is64Bit()) {
|
||||
EltVT = MVT::f64;
|
||||
MaskVT = MVT::getVectorVT(EltVT, Mask.size());
|
||||
}
|
||||
|
||||
MVT LogicVT = VT;
|
||||
if (EltVT == MVT::f32 || EltVT == MVT::f64) {
|
||||
Zero = DAG.getConstantFP(0.0, DL, MVT::f64);
|
||||
AllOnes = DAG.getConstantFP(APInt::getAllOnesValue(64).bitsToDouble(), DL,
|
||||
EltVT);
|
||||
LogicVT = MVT::getVectorVT(EltVT == MVT::f64 ? MVT::i64 : MVT::i32,
|
||||
Mask.size());
|
||||
} else {
|
||||
Zero = DAG.getConstant(0, DL, EltVT);
|
||||
AllOnes = DAG.getAllOnesConstant(DL, EltVT);
|
||||
}
|
||||
|
||||
SmallVector<SDValue, 16> VMaskOps(Mask.size(), Zero);
|
||||
SDValue V;
|
||||
for (int i = 0, Size = Mask.size(); i < Size; ++i) {
|
||||
|
@ -10386,8 +10405,11 @@ static SDValue lowerShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1,
|
|||
if (!V)
|
||||
return SDValue(); // No non-zeroable elements!
|
||||
|
||||
SDValue VMask = DAG.getBuildVector(VT, DL, VMaskOps);
|
||||
return DAG.getNode(ISD::AND, DL, VT, V, VMask);
|
||||
SDValue VMask = DAG.getBuildVector(MaskVT, DL, VMaskOps);
|
||||
VMask = DAG.getBitcast(LogicVT, VMask);
|
||||
V = DAG.getBitcast(LogicVT, V);
|
||||
SDValue And = DAG.getNode(ISD::AND, DL, LogicVT, V, VMask);
|
||||
return DAG.getBitcast(VT, And);
|
||||
}
|
||||
|
||||
/// Try to emit a blend instruction for a shuffle using bit math.
|
||||
|
@ -10552,7 +10574,7 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
|
|||
|
||||
// Attempt to lower to a bitmask if we can. VPAND is faster than VPBLENDVB.
|
||||
if (SDValue Masked = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable,
|
||||
DAG))
|
||||
Subtarget, DAG))
|
||||
return Masked;
|
||||
|
||||
if (Subtarget.hasBWI() && Subtarget.hasVLX()) {
|
||||
|
@ -10610,6 +10632,16 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
|
|||
case MVT::v16i32:
|
||||
case MVT::v32i16:
|
||||
case MVT::v64i8: {
|
||||
// Attempt to lower to a bitmask if we can. Only if not optimizing for size.
|
||||
bool OptForSize = DAG.getMachineFunction().getFunction().optForSize();
|
||||
if (!OptForSize) {
|
||||
if (SDValue Masked = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable,
|
||||
Subtarget, DAG))
|
||||
return Masked;
|
||||
}
|
||||
|
||||
// Otherwise load an immediate into a GPR, cast to k-register, and use a
|
||||
// masked move.
|
||||
MVT IntegerType =
|
||||
MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8));
|
||||
SDValue MaskNode = DAG.getConstant(BlendMask, DL, IntegerType);
|
||||
|
@ -12766,7 +12798,7 @@ static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
return Blend;
|
||||
|
||||
if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v4i32, V1, V2, Mask,
|
||||
Zeroable, DAG))
|
||||
Zeroable, Subtarget, DAG))
|
||||
return Masked;
|
||||
|
||||
// Use dedicated unpack instructions for masks that match their pattern.
|
||||
|
@ -13467,7 +13499,7 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
return Blend;
|
||||
|
||||
if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v8i16, V1, V2, Mask,
|
||||
Zeroable, DAG))
|
||||
Zeroable, Subtarget, DAG))
|
||||
return Masked;
|
||||
|
||||
// Use dedicated unpack instructions for masks that match their pattern.
|
||||
|
@ -13735,7 +13767,7 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
}
|
||||
|
||||
if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v16i8, V1, V2, Mask,
|
||||
Zeroable, DAG))
|
||||
Zeroable, Subtarget, DAG))
|
||||
return Masked;
|
||||
|
||||
// Use dedicated unpack instructions for masks that match their pattern.
|
||||
|
@ -15571,7 +15603,7 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
|
|||
// No floating point type available, if we can't use the bit operations
|
||||
// for masking/blending then decompose into 128-bit vectors.
|
||||
if (SDValue V = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable,
|
||||
DAG))
|
||||
Subtarget, DAG))
|
||||
return V;
|
||||
if (SDValue V = lowerShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG))
|
||||
return V;
|
||||
|
|
|
@ -1860,16 +1860,12 @@ define <32 x i16> @test_build_vec_v32i1(<32 x i16> %x) {
|
|||
;
|
||||
; SKX-LABEL: test_build_vec_v32i1:
|
||||
; SKX: ## %bb.0:
|
||||
; SKX-NEXT: movl $1497715861, %eax ## imm = 0x59455495
|
||||
; SKX-NEXT: kmovd %eax, %k1
|
||||
; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
|
||||
; SKX-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: test_build_vec_v32i1:
|
||||
; AVX512BW: ## %bb.0:
|
||||
; AVX512BW-NEXT: movl $1497715861, %eax ## imm = 0x59455495
|
||||
; AVX512BW-NEXT: kmovd %eax, %k1
|
||||
; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: test_build_vec_v32i1:
|
||||
|
@ -1880,6 +1876,41 @@ define <32 x i16> @test_build_vec_v32i1(<32 x i16> %x) {
|
|||
;
|
||||
; X86-LABEL: test_build_vec_v32i1:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: vandps LCPI40_0, %zmm0, %zmm0
|
||||
; X86-NEXT: retl
|
||||
%ret = select <32 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <32 x i16> %x, <32 x i16> zeroinitializer
|
||||
ret <32 x i16> %ret
|
||||
}
|
||||
|
||||
define <32 x i16> @test_build_vec_v32i1_optsize(<32 x i16> %x) optsize {
|
||||
; KNL-LABEL: test_build_vec_v32i1_optsize:
|
||||
; KNL: ## %bb.0:
|
||||
; KNL-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
||||
; KNL-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_build_vec_v32i1_optsize:
|
||||
; SKX: ## %bb.0:
|
||||
; SKX-NEXT: movl $1497715861, %eax ## imm = 0x59455495
|
||||
; SKX-NEXT: kmovd %eax, %k1
|
||||
; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
|
||||
; SKX-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: test_build_vec_v32i1_optsize:
|
||||
; AVX512BW: ## %bb.0:
|
||||
; AVX512BW-NEXT: movl $1497715861, %eax ## imm = 0x59455495
|
||||
; AVX512BW-NEXT: kmovd %eax, %k1
|
||||
; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: test_build_vec_v32i1_optsize:
|
||||
; AVX512DQ: ## %bb.0:
|
||||
; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: test_build_vec_v32i1_optsize:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: movl $1497715861, %eax ## imm = 0x59455495
|
||||
; X86-NEXT: kmovd %eax, %k1
|
||||
; X86-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
|
||||
|
@ -1928,12 +1959,12 @@ define void @ktest_1(<8 x double> %in, double * %base) {
|
|||
; KNL-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: testb %al, %al
|
||||
; KNL-NEXT: je LBB42_2
|
||||
; KNL-NEXT: je LBB43_2
|
||||
; KNL-NEXT: ## %bb.1: ## %L1
|
||||
; KNL-NEXT: vmovapd %zmm0, (%rdi)
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: retq
|
||||
; KNL-NEXT: LBB42_2: ## %L2
|
||||
; KNL-NEXT: LBB43_2: ## %L2
|
||||
; KNL-NEXT: vmovapd %zmm0, 8(%rdi)
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: retq
|
||||
|
@ -1945,12 +1976,12 @@ define void @ktest_1(<8 x double> %in, double * %base) {
|
|||
; SKX-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z}
|
||||
; SKX-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
|
||||
; SKX-NEXT: kortestb %k0, %k0
|
||||
; SKX-NEXT: je LBB42_2
|
||||
; SKX-NEXT: je LBB43_2
|
||||
; SKX-NEXT: ## %bb.1: ## %L1
|
||||
; SKX-NEXT: vmovapd %zmm0, (%rdi)
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
; SKX-NEXT: LBB42_2: ## %L2
|
||||
; SKX-NEXT: LBB43_2: ## %L2
|
||||
; SKX-NEXT: vmovapd %zmm0, 8(%rdi)
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
|
@ -1963,12 +1994,12 @@ define void @ktest_1(<8 x double> %in, double * %base) {
|
|||
; AVX512BW-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
|
||||
; AVX512BW-NEXT: kmovd %k0, %eax
|
||||
; AVX512BW-NEXT: testb %al, %al
|
||||
; AVX512BW-NEXT: je LBB42_2
|
||||
; AVX512BW-NEXT: je LBB43_2
|
||||
; AVX512BW-NEXT: ## %bb.1: ## %L1
|
||||
; AVX512BW-NEXT: vmovapd %zmm0, (%rdi)
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
; AVX512BW-NEXT: LBB42_2: ## %L2
|
||||
; AVX512BW-NEXT: LBB43_2: ## %L2
|
||||
; AVX512BW-NEXT: vmovapd %zmm0, 8(%rdi)
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
|
@ -1980,12 +2011,12 @@ define void @ktest_1(<8 x double> %in, double * %base) {
|
|||
; AVX512DQ-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z}
|
||||
; AVX512DQ-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
|
||||
; AVX512DQ-NEXT: kortestb %k0, %k0
|
||||
; AVX512DQ-NEXT: je LBB42_2
|
||||
; AVX512DQ-NEXT: je LBB43_2
|
||||
; AVX512DQ-NEXT: ## %bb.1: ## %L1
|
||||
; AVX512DQ-NEXT: vmovapd %zmm0, (%rdi)
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
; AVX512DQ-NEXT: LBB42_2: ## %L2
|
||||
; AVX512DQ-NEXT: LBB43_2: ## %L2
|
||||
; AVX512DQ-NEXT: vmovapd %zmm0, 8(%rdi)
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
|
@ -1998,12 +2029,12 @@ define void @ktest_1(<8 x double> %in, double * %base) {
|
|||
; X86-NEXT: vmovupd 8(%eax), %zmm1 {%k1} {z}
|
||||
; X86-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
|
||||
; X86-NEXT: kortestb %k0, %k0
|
||||
; X86-NEXT: je LBB42_2
|
||||
; X86-NEXT: je LBB43_2
|
||||
; X86-NEXT: ## %bb.1: ## %L1
|
||||
; X86-NEXT: vmovapd %zmm0, (%eax)
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: retl
|
||||
; X86-NEXT: LBB42_2: ## %L2
|
||||
; X86-NEXT: LBB43_2: ## %L2
|
||||
; X86-NEXT: vmovapd %zmm0, 8(%eax)
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: retl
|
||||
|
@ -2052,13 +2083,13 @@ define void @ktest_2(<32 x float> %in, float * %base) {
|
|||
; KNL-NEXT: kmovw %k0, %ecx
|
||||
; KNL-NEXT: shll $16, %ecx
|
||||
; KNL-NEXT: orl %eax, %ecx
|
||||
; KNL-NEXT: je LBB43_2
|
||||
; KNL-NEXT: je LBB44_2
|
||||
; KNL-NEXT: ## %bb.1: ## %L1
|
||||
; KNL-NEXT: vmovaps %zmm0, (%rdi)
|
||||
; KNL-NEXT: vmovaps %zmm1, 64(%rdi)
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: retq
|
||||
; KNL-NEXT: LBB43_2: ## %L2
|
||||
; KNL-NEXT: LBB44_2: ## %L2
|
||||
; KNL-NEXT: vmovaps %zmm0, 4(%rdi)
|
||||
; KNL-NEXT: vmovaps %zmm1, 68(%rdi)
|
||||
; KNL-NEXT: vzeroupper
|
||||
|
@ -2077,13 +2108,13 @@ define void @ktest_2(<32 x float> %in, float * %base) {
|
|||
; SKX-NEXT: vcmpltps %zmm2, %zmm1, %k2
|
||||
; SKX-NEXT: kunpckwd %k1, %k2, %k1
|
||||
; SKX-NEXT: kortestd %k1, %k0
|
||||
; SKX-NEXT: je LBB43_2
|
||||
; SKX-NEXT: je LBB44_2
|
||||
; SKX-NEXT: ## %bb.1: ## %L1
|
||||
; SKX-NEXT: vmovaps %zmm0, (%rdi)
|
||||
; SKX-NEXT: vmovaps %zmm1, 64(%rdi)
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
; SKX-NEXT: LBB43_2: ## %L2
|
||||
; SKX-NEXT: LBB44_2: ## %L2
|
||||
; SKX-NEXT: vmovaps %zmm0, 4(%rdi)
|
||||
; SKX-NEXT: vmovaps %zmm1, 68(%rdi)
|
||||
; SKX-NEXT: vzeroupper
|
||||
|
@ -2102,13 +2133,13 @@ define void @ktest_2(<32 x float> %in, float * %base) {
|
|||
; AVX512BW-NEXT: vcmpltps %zmm2, %zmm1, %k2
|
||||
; AVX512BW-NEXT: kunpckwd %k1, %k2, %k1
|
||||
; AVX512BW-NEXT: kortestd %k1, %k0
|
||||
; AVX512BW-NEXT: je LBB43_2
|
||||
; AVX512BW-NEXT: je LBB44_2
|
||||
; AVX512BW-NEXT: ## %bb.1: ## %L1
|
||||
; AVX512BW-NEXT: vmovaps %zmm0, (%rdi)
|
||||
; AVX512BW-NEXT: vmovaps %zmm1, 64(%rdi)
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
; AVX512BW-NEXT: LBB43_2: ## %L2
|
||||
; AVX512BW-NEXT: LBB44_2: ## %L2
|
||||
; AVX512BW-NEXT: vmovaps %zmm0, 4(%rdi)
|
||||
; AVX512BW-NEXT: vmovaps %zmm1, 68(%rdi)
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
|
@ -2130,13 +2161,13 @@ define void @ktest_2(<32 x float> %in, float * %base) {
|
|||
; AVX512DQ-NEXT: kmovw %k0, %ecx
|
||||
; AVX512DQ-NEXT: shll $16, %ecx
|
||||
; AVX512DQ-NEXT: orl %eax, %ecx
|
||||
; AVX512DQ-NEXT: je LBB43_2
|
||||
; AVX512DQ-NEXT: je LBB44_2
|
||||
; AVX512DQ-NEXT: ## %bb.1: ## %L1
|
||||
; AVX512DQ-NEXT: vmovaps %zmm0, (%rdi)
|
||||
; AVX512DQ-NEXT: vmovaps %zmm1, 64(%rdi)
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
; AVX512DQ-NEXT: LBB43_2: ## %L2
|
||||
; AVX512DQ-NEXT: LBB44_2: ## %L2
|
||||
; AVX512DQ-NEXT: vmovaps %zmm0, 4(%rdi)
|
||||
; AVX512DQ-NEXT: vmovaps %zmm1, 68(%rdi)
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
|
@ -2156,13 +2187,13 @@ define void @ktest_2(<32 x float> %in, float * %base) {
|
|||
; X86-NEXT: vcmpltps %zmm2, %zmm1, %k2
|
||||
; X86-NEXT: kunpckwd %k1, %k2, %k1
|
||||
; X86-NEXT: kortestd %k1, %k0
|
||||
; X86-NEXT: je LBB43_2
|
||||
; X86-NEXT: je LBB44_2
|
||||
; X86-NEXT: ## %bb.1: ## %L1
|
||||
; X86-NEXT: vmovaps %zmm0, (%eax)
|
||||
; X86-NEXT: vmovaps %zmm1, 64(%eax)
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: retl
|
||||
; X86-NEXT: LBB43_2: ## %L2
|
||||
; X86-NEXT: LBB44_2: ## %L2
|
||||
; X86-NEXT: vmovaps %zmm0, 4(%eax)
|
||||
; X86-NEXT: vmovaps %zmm1, 68(%eax)
|
||||
; X86-NEXT: vzeroupper
|
||||
|
@ -3175,12 +3206,12 @@ define void @ktest_signed(<16 x i32> %x, <16 x i32> %y) {
|
|||
; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: testw %ax, %ax
|
||||
; KNL-NEXT: jle LBB64_1
|
||||
; KNL-NEXT: jle LBB65_1
|
||||
; KNL-NEXT: ## %bb.2: ## %bb.2
|
||||
; KNL-NEXT: popq %rax
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: retq
|
||||
; KNL-NEXT: LBB64_1: ## %bb.1
|
||||
; KNL-NEXT: LBB65_1: ## %bb.1
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: callq _foo
|
||||
; KNL-NEXT: popq %rax
|
||||
|
@ -3194,12 +3225,12 @@ define void @ktest_signed(<16 x i32> %x, <16 x i32> %y) {
|
|||
; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: testw %ax, %ax
|
||||
; SKX-NEXT: jle LBB64_1
|
||||
; SKX-NEXT: jle LBB65_1
|
||||
; SKX-NEXT: ## %bb.2: ## %bb.2
|
||||
; SKX-NEXT: popq %rax
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
; SKX-NEXT: LBB64_1: ## %bb.1
|
||||
; SKX-NEXT: LBB65_1: ## %bb.1
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: callq _foo
|
||||
; SKX-NEXT: popq %rax
|
||||
|
@ -3213,12 +3244,12 @@ define void @ktest_signed(<16 x i32> %x, <16 x i32> %y) {
|
|||
; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
|
||||
; AVX512BW-NEXT: kmovd %k0, %eax
|
||||
; AVX512BW-NEXT: testw %ax, %ax
|
||||
; AVX512BW-NEXT: jle LBB64_1
|
||||
; AVX512BW-NEXT: jle LBB65_1
|
||||
; AVX512BW-NEXT: ## %bb.2: ## %bb.2
|
||||
; AVX512BW-NEXT: popq %rax
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
; AVX512BW-NEXT: LBB64_1: ## %bb.1
|
||||
; AVX512BW-NEXT: LBB65_1: ## %bb.1
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: callq _foo
|
||||
; AVX512BW-NEXT: popq %rax
|
||||
|
@ -3232,12 +3263,12 @@ define void @ktest_signed(<16 x i32> %x, <16 x i32> %y) {
|
|||
; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0
|
||||
; AVX512DQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512DQ-NEXT: testw %ax, %ax
|
||||
; AVX512DQ-NEXT: jle LBB64_1
|
||||
; AVX512DQ-NEXT: jle LBB65_1
|
||||
; AVX512DQ-NEXT: ## %bb.2: ## %bb.2
|
||||
; AVX512DQ-NEXT: popq %rax
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
; AVX512DQ-NEXT: LBB64_1: ## %bb.1
|
||||
; AVX512DQ-NEXT: LBB65_1: ## %bb.1
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: callq _foo
|
||||
; AVX512DQ-NEXT: popq %rax
|
||||
|
@ -3251,12 +3282,12 @@ define void @ktest_signed(<16 x i32> %x, <16 x i32> %y) {
|
|||
; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0
|
||||
; X86-NEXT: kmovd %k0, %eax
|
||||
; X86-NEXT: testw %ax, %ax
|
||||
; X86-NEXT: jle LBB64_1
|
||||
; X86-NEXT: jle LBB65_1
|
||||
; X86-NEXT: ## %bb.2: ## %bb.2
|
||||
; X86-NEXT: addl $12, %esp
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: retl
|
||||
; X86-NEXT: LBB64_1: ## %bb.1
|
||||
; X86-NEXT: LBB65_1: ## %bb.1
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: calll _foo
|
||||
; X86-NEXT: addl $12, %esp
|
||||
|
@ -3284,11 +3315,11 @@ define void @ktest_allones(<16 x i32> %x, <16 x i32> %y) {
|
|||
; CHECK-NEXT: vpord %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k0
|
||||
; CHECK-NEXT: kortestw %k0, %k0
|
||||
; CHECK-NEXT: jb LBB65_2
|
||||
; CHECK-NEXT: jb LBB66_2
|
||||
; CHECK-NEXT: ## %bb.1: ## %bb.1
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: callq _foo
|
||||
; CHECK-NEXT: LBB65_2: ## %bb.2
|
||||
; CHECK-NEXT: LBB66_2: ## %bb.2
|
||||
; CHECK-NEXT: popq %rax
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
|
@ -3300,11 +3331,11 @@ define void @ktest_allones(<16 x i32> %x, <16 x i32> %y) {
|
|||
; X86-NEXT: vpord %zmm1, %zmm0, %zmm0
|
||||
; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0
|
||||
; X86-NEXT: kortestw %k0, %k0
|
||||
; X86-NEXT: jb LBB65_2
|
||||
; X86-NEXT: jb LBB66_2
|
||||
; X86-NEXT: ## %bb.1: ## %bb.1
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: calll _foo
|
||||
; X86-NEXT: LBB65_2: ## %bb.2
|
||||
; X86-NEXT: LBB66_2: ## %bb.2
|
||||
; X86-NEXT: addl $12, %esp
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: retl
|
||||
|
@ -3492,12 +3523,12 @@ define void @ktest_3(<8 x i32> %w, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
|
|||
; KNL-NEXT: kandw %k1, %k0, %k0
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: testb %al, %al
|
||||
; KNL-NEXT: je LBB71_1
|
||||
; KNL-NEXT: je LBB72_1
|
||||
; KNL-NEXT: ## %bb.2: ## %exit
|
||||
; KNL-NEXT: popq %rax
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: retq
|
||||
; KNL-NEXT: LBB71_1: ## %bar
|
||||
; KNL-NEXT: LBB72_1: ## %bar
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: callq _foo
|
||||
; KNL-NEXT: popq %rax
|
||||
|
@ -3514,12 +3545,12 @@ define void @ktest_3(<8 x i32> %w, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
|
|||
; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k2
|
||||
; SKX-NEXT: korb %k2, %k1, %k1
|
||||
; SKX-NEXT: ktestb %k1, %k0
|
||||
; SKX-NEXT: je LBB71_1
|
||||
; SKX-NEXT: je LBB72_1
|
||||
; SKX-NEXT: ## %bb.2: ## %exit
|
||||
; SKX-NEXT: popq %rax
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
; SKX-NEXT: LBB71_1: ## %bar
|
||||
; SKX-NEXT: LBB72_1: ## %bar
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: callq _foo
|
||||
; SKX-NEXT: popq %rax
|
||||
|
@ -3542,12 +3573,12 @@ define void @ktest_3(<8 x i32> %w, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
|
|||
; AVX512BW-NEXT: kandw %k1, %k0, %k0
|
||||
; AVX512BW-NEXT: kmovd %k0, %eax
|
||||
; AVX512BW-NEXT: testb %al, %al
|
||||
; AVX512BW-NEXT: je LBB71_1
|
||||
; AVX512BW-NEXT: je LBB72_1
|
||||
; AVX512BW-NEXT: ## %bb.2: ## %exit
|
||||
; AVX512BW-NEXT: popq %rax
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
; AVX512BW-NEXT: LBB71_1: ## %bar
|
||||
; AVX512BW-NEXT: LBB72_1: ## %bar
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: callq _foo
|
||||
; AVX512BW-NEXT: popq %rax
|
||||
|
@ -3568,12 +3599,12 @@ define void @ktest_3(<8 x i32> %w, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
|
|||
; AVX512DQ-NEXT: korb %k1, %k0, %k0
|
||||
; AVX512DQ-NEXT: korb %k3, %k2, %k1
|
||||
; AVX512DQ-NEXT: ktestb %k1, %k0
|
||||
; AVX512DQ-NEXT: je LBB71_1
|
||||
; AVX512DQ-NEXT: je LBB72_1
|
||||
; AVX512DQ-NEXT: ## %bb.2: ## %exit
|
||||
; AVX512DQ-NEXT: popq %rax
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
; AVX512DQ-NEXT: LBB71_1: ## %bar
|
||||
; AVX512DQ-NEXT: LBB72_1: ## %bar
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: callq _foo
|
||||
; AVX512DQ-NEXT: popq %rax
|
||||
|
@ -3590,12 +3621,12 @@ define void @ktest_3(<8 x i32> %w, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
|
|||
; X86-NEXT: vptestnmd %ymm3, %ymm3, %k2
|
||||
; X86-NEXT: korb %k2, %k1, %k1
|
||||
; X86-NEXT: ktestb %k1, %k0
|
||||
; X86-NEXT: je LBB71_1
|
||||
; X86-NEXT: je LBB72_1
|
||||
; X86-NEXT: ## %bb.2: ## %exit
|
||||
; X86-NEXT: addl $12, %esp
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: retl
|
||||
; X86-NEXT: LBB71_1: ## %bar
|
||||
; X86-NEXT: LBB72_1: ## %bar
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: calll _foo
|
||||
; X86-NEXT: addl $12, %esp
|
||||
|
@ -3633,12 +3664,12 @@ define void @ktest_4(<8 x i64> %w, <8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
|
|||
; KNL-NEXT: kandw %k1, %k0, %k0
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: testb %al, %al
|
||||
; KNL-NEXT: je LBB72_1
|
||||
; KNL-NEXT: je LBB73_1
|
||||
; KNL-NEXT: ## %bb.2: ## %exit
|
||||
; KNL-NEXT: popq %rax
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: retq
|
||||
; KNL-NEXT: LBB72_1: ## %bar
|
||||
; KNL-NEXT: LBB73_1: ## %bar
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: callq _foo
|
||||
; KNL-NEXT: popq %rax
|
||||
|
@ -3655,12 +3686,12 @@ define void @ktest_4(<8 x i64> %w, <8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
|
|||
; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k2
|
||||
; SKX-NEXT: korb %k2, %k1, %k1
|
||||
; SKX-NEXT: ktestb %k1, %k0
|
||||
; SKX-NEXT: je LBB72_1
|
||||
; SKX-NEXT: je LBB73_1
|
||||
; SKX-NEXT: ## %bb.2: ## %exit
|
||||
; SKX-NEXT: popq %rax
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
; SKX-NEXT: LBB72_1: ## %bar
|
||||
; SKX-NEXT: LBB73_1: ## %bar
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: callq _foo
|
||||
; SKX-NEXT: popq %rax
|
||||
|
@ -3679,12 +3710,12 @@ define void @ktest_4(<8 x i64> %w, <8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
|
|||
; AVX512BW-NEXT: kandw %k1, %k0, %k0
|
||||
; AVX512BW-NEXT: kmovd %k0, %eax
|
||||
; AVX512BW-NEXT: testb %al, %al
|
||||
; AVX512BW-NEXT: je LBB72_1
|
||||
; AVX512BW-NEXT: je LBB73_1
|
||||
; AVX512BW-NEXT: ## %bb.2: ## %exit
|
||||
; AVX512BW-NEXT: popq %rax
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
; AVX512BW-NEXT: LBB72_1: ## %bar
|
||||
; AVX512BW-NEXT: LBB73_1: ## %bar
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: callq _foo
|
||||
; AVX512BW-NEXT: popq %rax
|
||||
|
@ -3701,12 +3732,12 @@ define void @ktest_4(<8 x i64> %w, <8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
|
|||
; AVX512DQ-NEXT: vptestnmq %zmm3, %zmm3, %k2
|
||||
; AVX512DQ-NEXT: korb %k2, %k1, %k1
|
||||
; AVX512DQ-NEXT: ktestb %k1, %k0
|
||||
; AVX512DQ-NEXT: je LBB72_1
|
||||
; AVX512DQ-NEXT: je LBB73_1
|
||||
; AVX512DQ-NEXT: ## %bb.2: ## %exit
|
||||
; AVX512DQ-NEXT: popq %rax
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
; AVX512DQ-NEXT: LBB72_1: ## %bar
|
||||
; AVX512DQ-NEXT: LBB73_1: ## %bar
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: callq _foo
|
||||
; AVX512DQ-NEXT: popq %rax
|
||||
|
@ -3723,12 +3754,12 @@ define void @ktest_4(<8 x i64> %w, <8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
|
|||
; X86-NEXT: vptestnmq %zmm3, %zmm3, %k2
|
||||
; X86-NEXT: korb %k2, %k1, %k1
|
||||
; X86-NEXT: ktestb %k1, %k0
|
||||
; X86-NEXT: je LBB72_1
|
||||
; X86-NEXT: je LBB73_1
|
||||
; X86-NEXT: ## %bb.2: ## %exit
|
||||
; X86-NEXT: addl $12, %esp
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: retl
|
||||
; X86-NEXT: LBB72_1: ## %bar
|
||||
; X86-NEXT: LBB73_1: ## %bar
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: calll _foo
|
||||
; X86-NEXT: addl $12, %esp
|
||||
|
@ -3765,12 +3796,12 @@ define void @ktest_5(<16 x i32> %w, <16 x i32> %x, <16 x i32> %y, <16 x i32> %z)
|
|||
; KNL-NEXT: korw %k2, %k1, %k1
|
||||
; KNL-NEXT: kandw %k1, %k0, %k0
|
||||
; KNL-NEXT: kortestw %k0, %k0
|
||||
; KNL-NEXT: je LBB73_1
|
||||
; KNL-NEXT: je LBB74_1
|
||||
; KNL-NEXT: ## %bb.2: ## %exit
|
||||
; KNL-NEXT: popq %rax
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: retq
|
||||
; KNL-NEXT: LBB73_1: ## %bar
|
||||
; KNL-NEXT: LBB74_1: ## %bar
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: callq _foo
|
||||
; KNL-NEXT: popq %rax
|
||||
|
@ -3787,12 +3818,12 @@ define void @ktest_5(<16 x i32> %w, <16 x i32> %x, <16 x i32> %y, <16 x i32> %z)
|
|||
; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k2
|
||||
; SKX-NEXT: korw %k2, %k1, %k1
|
||||
; SKX-NEXT: ktestw %k1, %k0
|
||||
; SKX-NEXT: je LBB73_1
|
||||
; SKX-NEXT: je LBB74_1
|
||||
; SKX-NEXT: ## %bb.2: ## %exit
|
||||
; SKX-NEXT: popq %rax
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
; SKX-NEXT: LBB73_1: ## %bar
|
||||
; SKX-NEXT: LBB74_1: ## %bar
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: callq _foo
|
||||
; SKX-NEXT: popq %rax
|
||||
|
@ -3810,12 +3841,12 @@ define void @ktest_5(<16 x i32> %w, <16 x i32> %x, <16 x i32> %y, <16 x i32> %z)
|
|||
; AVX512BW-NEXT: korw %k2, %k1, %k1
|
||||
; AVX512BW-NEXT: kandw %k1, %k0, %k0
|
||||
; AVX512BW-NEXT: kortestw %k0, %k0
|
||||
; AVX512BW-NEXT: je LBB73_1
|
||||
; AVX512BW-NEXT: je LBB74_1
|
||||
; AVX512BW-NEXT: ## %bb.2: ## %exit
|
||||
; AVX512BW-NEXT: popq %rax
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
; AVX512BW-NEXT: LBB73_1: ## %bar
|
||||
; AVX512BW-NEXT: LBB74_1: ## %bar
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: callq _foo
|
||||
; AVX512BW-NEXT: popq %rax
|
||||
|
@ -3832,12 +3863,12 @@ define void @ktest_5(<16 x i32> %w, <16 x i32> %x, <16 x i32> %y, <16 x i32> %z)
|
|||
; AVX512DQ-NEXT: vptestnmd %zmm3, %zmm3, %k2
|
||||
; AVX512DQ-NEXT: korw %k2, %k1, %k1
|
||||
; AVX512DQ-NEXT: ktestw %k1, %k0
|
||||
; AVX512DQ-NEXT: je LBB73_1
|
||||
; AVX512DQ-NEXT: je LBB74_1
|
||||
; AVX512DQ-NEXT: ## %bb.2: ## %exit
|
||||
; AVX512DQ-NEXT: popq %rax
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
; AVX512DQ-NEXT: LBB73_1: ## %bar
|
||||
; AVX512DQ-NEXT: LBB74_1: ## %bar
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: callq _foo
|
||||
; AVX512DQ-NEXT: popq %rax
|
||||
|
@ -3854,12 +3885,12 @@ define void @ktest_5(<16 x i32> %w, <16 x i32> %x, <16 x i32> %y, <16 x i32> %z)
|
|||
; X86-NEXT: vptestnmd %zmm3, %zmm3, %k2
|
||||
; X86-NEXT: korw %k2, %k1, %k1
|
||||
; X86-NEXT: ktestw %k1, %k0
|
||||
; X86-NEXT: je LBB73_1
|
||||
; X86-NEXT: je LBB74_1
|
||||
; X86-NEXT: ## %bb.2: ## %exit
|
||||
; X86-NEXT: addl $12, %esp
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: retl
|
||||
; X86-NEXT: LBB73_1: ## %bar
|
||||
; X86-NEXT: LBB74_1: ## %bar
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: calll _foo
|
||||
; X86-NEXT: addl $12, %esp
|
||||
|
@ -3911,12 +3942,12 @@ define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z)
|
|||
; KNL-NEXT: kmovw %k0, %ecx
|
||||
; KNL-NEXT: shll $16, %ecx
|
||||
; KNL-NEXT: orl %eax, %ecx
|
||||
; KNL-NEXT: je LBB74_1
|
||||
; KNL-NEXT: je LBB75_1
|
||||
; KNL-NEXT: ## %bb.2: ## %exit
|
||||
; KNL-NEXT: popq %rax
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: retq
|
||||
; KNL-NEXT: LBB74_1: ## %bar
|
||||
; KNL-NEXT: LBB75_1: ## %bar
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: callq _foo
|
||||
; KNL-NEXT: popq %rax
|
||||
|
@ -3933,12 +3964,12 @@ define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z)
|
|||
; SKX-NEXT: vptestnmw %zmm3, %zmm3, %k2
|
||||
; SKX-NEXT: kord %k2, %k1, %k1
|
||||
; SKX-NEXT: ktestd %k1, %k0
|
||||
; SKX-NEXT: je LBB74_1
|
||||
; SKX-NEXT: je LBB75_1
|
||||
; SKX-NEXT: ## %bb.2: ## %exit
|
||||
; SKX-NEXT: popq %rax
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
; SKX-NEXT: LBB74_1: ## %bar
|
||||
; SKX-NEXT: LBB75_1: ## %bar
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: callq _foo
|
||||
; SKX-NEXT: popq %rax
|
||||
|
@ -3955,12 +3986,12 @@ define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z)
|
|||
; AVX512BW-NEXT: vptestnmw %zmm3, %zmm3, %k2
|
||||
; AVX512BW-NEXT: kord %k2, %k1, %k1
|
||||
; AVX512BW-NEXT: ktestd %k1, %k0
|
||||
; AVX512BW-NEXT: je LBB74_1
|
||||
; AVX512BW-NEXT: je LBB75_1
|
||||
; AVX512BW-NEXT: ## %bb.2: ## %exit
|
||||
; AVX512BW-NEXT: popq %rax
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
; AVX512BW-NEXT: LBB74_1: ## %bar
|
||||
; AVX512BW-NEXT: LBB75_1: ## %bar
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: callq _foo
|
||||
; AVX512BW-NEXT: popq %rax
|
||||
|
@ -3993,12 +4024,12 @@ define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z)
|
|||
; AVX512DQ-NEXT: kmovw %k0, %ecx
|
||||
; AVX512DQ-NEXT: shll $16, %ecx
|
||||
; AVX512DQ-NEXT: orl %eax, %ecx
|
||||
; AVX512DQ-NEXT: je LBB74_1
|
||||
; AVX512DQ-NEXT: je LBB75_1
|
||||
; AVX512DQ-NEXT: ## %bb.2: ## %exit
|
||||
; AVX512DQ-NEXT: popq %rax
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
; AVX512DQ-NEXT: LBB74_1: ## %bar
|
||||
; AVX512DQ-NEXT: LBB75_1: ## %bar
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: callq _foo
|
||||
; AVX512DQ-NEXT: popq %rax
|
||||
|
@ -4015,12 +4046,12 @@ define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z)
|
|||
; X86-NEXT: vptestnmw %zmm3, %zmm3, %k2
|
||||
; X86-NEXT: kord %k2, %k1, %k1
|
||||
; X86-NEXT: ktestd %k1, %k0
|
||||
; X86-NEXT: je LBB74_1
|
||||
; X86-NEXT: je LBB75_1
|
||||
; X86-NEXT: ## %bb.2: ## %exit
|
||||
; X86-NEXT: addl $12, %esp
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: retl
|
||||
; X86-NEXT: LBB74_1: ## %bar
|
||||
; X86-NEXT: LBB75_1: ## %bar
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: calll _foo
|
||||
; X86-NEXT: addl $12, %esp
|
||||
|
@ -4096,12 +4127,12 @@ define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) {
|
|||
; KNL-NEXT: orl %eax, %edx
|
||||
; KNL-NEXT: shlq $32, %rdx
|
||||
; KNL-NEXT: orq %rcx, %rdx
|
||||
; KNL-NEXT: je LBB75_1
|
||||
; KNL-NEXT: je LBB76_1
|
||||
; KNL-NEXT: ## %bb.2: ## %exit
|
||||
; KNL-NEXT: popq %rax
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: retq
|
||||
; KNL-NEXT: LBB75_1: ## %bar
|
||||
; KNL-NEXT: LBB76_1: ## %bar
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: callq _foo
|
||||
; KNL-NEXT: popq %rax
|
||||
|
@ -4118,12 +4149,12 @@ define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) {
|
|||
; SKX-NEXT: vptestnmb %zmm3, %zmm3, %k2
|
||||
; SKX-NEXT: korq %k2, %k1, %k1
|
||||
; SKX-NEXT: ktestq %k1, %k0
|
||||
; SKX-NEXT: je LBB75_1
|
||||
; SKX-NEXT: je LBB76_1
|
||||
; SKX-NEXT: ## %bb.2: ## %exit
|
||||
; SKX-NEXT: popq %rax
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
; SKX-NEXT: LBB75_1: ## %bar
|
||||
; SKX-NEXT: LBB76_1: ## %bar
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: callq _foo
|
||||
; SKX-NEXT: popq %rax
|
||||
|
@ -4140,12 +4171,12 @@ define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) {
|
|||
; AVX512BW-NEXT: vptestnmb %zmm3, %zmm3, %k2
|
||||
; AVX512BW-NEXT: korq %k2, %k1, %k1
|
||||
; AVX512BW-NEXT: ktestq %k1, %k0
|
||||
; AVX512BW-NEXT: je LBB75_1
|
||||
; AVX512BW-NEXT: je LBB76_1
|
||||
; AVX512BW-NEXT: ## %bb.2: ## %exit
|
||||
; AVX512BW-NEXT: popq %rax
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
; AVX512BW-NEXT: LBB75_1: ## %bar
|
||||
; AVX512BW-NEXT: LBB76_1: ## %bar
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: callq _foo
|
||||
; AVX512BW-NEXT: popq %rax
|
||||
|
@ -4202,12 +4233,12 @@ define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) {
|
|||
; AVX512DQ-NEXT: orl %eax, %edx
|
||||
; AVX512DQ-NEXT: shlq $32, %rdx
|
||||
; AVX512DQ-NEXT: orq %rcx, %rdx
|
||||
; AVX512DQ-NEXT: je LBB75_1
|
||||
; AVX512DQ-NEXT: je LBB76_1
|
||||
; AVX512DQ-NEXT: ## %bb.2: ## %exit
|
||||
; AVX512DQ-NEXT: popq %rax
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
; AVX512DQ-NEXT: LBB75_1: ## %bar
|
||||
; AVX512DQ-NEXT: LBB76_1: ## %bar
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: callq _foo
|
||||
; AVX512DQ-NEXT: popq %rax
|
||||
|
@ -4226,12 +4257,12 @@ define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) {
|
|||
; X86-NEXT: kandq %k1, %k0, %k0
|
||||
; X86-NEXT: kshiftrq $32, %k0, %k1
|
||||
; X86-NEXT: kortestd %k1, %k0
|
||||
; X86-NEXT: je LBB75_1
|
||||
; X86-NEXT: je LBB76_1
|
||||
; X86-NEXT: ## %bb.2: ## %exit
|
||||
; X86-NEXT: addl $12, %esp
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: retl
|
||||
; X86-NEXT: LBB75_1: ## %bar
|
||||
; X86-NEXT: LBB76_1: ## %bar
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: calll _foo
|
||||
; X86-NEXT: addl $12, %esp
|
||||
|
|
|
@ -128,29 +128,17 @@ define <8 x double> @merge_8f64_f64_12zzuuzz(double* %ptr) nounwind uwtable noin
|
|||
}
|
||||
|
||||
define <8 x double> @merge_8f64_f64_1u3u5zu8(double* %ptr) nounwind uwtable noinline ssp {
|
||||
; AVX512F-LABEL: merge_8f64_f64_1u3u5zu8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: movb $32, %al
|
||||
; AVX512F-NEXT: kmovw %eax, %k0
|
||||
; AVX512F-NEXT: knotw %k0, %k1
|
||||
; AVX512F-NEXT: vmovupd 8(%rdi), %zmm0 {%k1} {z}
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: merge_8f64_f64_1u3u5zu8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: movb $32, %al
|
||||
; AVX512BW-NEXT: kmovd %eax, %k0
|
||||
; AVX512BW-NEXT: knotw %k0, %k1
|
||||
; AVX512BW-NEXT: vmovupd 8(%rdi), %zmm0 {%k1} {z}
|
||||
; AVX512BW-NEXT: retq
|
||||
; ALL-LABEL: merge_8f64_f64_1u3u5zu8:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: vmovdqu64 8(%rdi), %zmm0
|
||||
; ALL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; ALL-NEXT: retq
|
||||
;
|
||||
; X32-AVX512F-LABEL: merge_8f64_f64_1u3u5zu8:
|
||||
; X32-AVX512F: # %bb.0:
|
||||
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-AVX512F-NEXT: movb $32, %cl
|
||||
; X32-AVX512F-NEXT: kmovw %ecx, %k0
|
||||
; X32-AVX512F-NEXT: knotw %k0, %k1
|
||||
; X32-AVX512F-NEXT: vmovupd 8(%eax), %zmm0 {%k1} {z}
|
||||
; X32-AVX512F-NEXT: vmovdqu64 8(%eax), %zmm0
|
||||
; X32-AVX512F-NEXT: vpandq {{\.LCPI.*}}, %zmm0, %zmm0
|
||||
; X32-AVX512F-NEXT: retl
|
||||
%ptr0 = getelementptr inbounds double, double* %ptr, i64 1
|
||||
%ptr2 = getelementptr inbounds double, double* %ptr, i64 3
|
||||
|
@ -219,29 +207,17 @@ define <8 x i64> @merge_8i64_i64_56zz9uzz(i64* %ptr) nounwind uwtable noinline s
|
|||
}
|
||||
|
||||
define <8 x i64> @merge_8i64_i64_1u3u5zu8(i64* %ptr) nounwind uwtable noinline ssp {
|
||||
; AVX512F-LABEL: merge_8i64_i64_1u3u5zu8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: movb $32, %al
|
||||
; AVX512F-NEXT: kmovw %eax, %k0
|
||||
; AVX512F-NEXT: knotw %k0, %k1
|
||||
; AVX512F-NEXT: vmovdqu64 8(%rdi), %zmm0 {%k1} {z}
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: merge_8i64_i64_1u3u5zu8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: movb $32, %al
|
||||
; AVX512BW-NEXT: kmovd %eax, %k0
|
||||
; AVX512BW-NEXT: knotw %k0, %k1
|
||||
; AVX512BW-NEXT: vmovdqu64 8(%rdi), %zmm0 {%k1} {z}
|
||||
; AVX512BW-NEXT: retq
|
||||
; ALL-LABEL: merge_8i64_i64_1u3u5zu8:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: vmovdqu64 8(%rdi), %zmm0
|
||||
; ALL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; ALL-NEXT: retq
|
||||
;
|
||||
; X32-AVX512F-LABEL: merge_8i64_i64_1u3u5zu8:
|
||||
; X32-AVX512F: # %bb.0:
|
||||
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-AVX512F-NEXT: movb $32, %cl
|
||||
; X32-AVX512F-NEXT: kmovw %ecx, %k0
|
||||
; X32-AVX512F-NEXT: knotw %k0, %k1
|
||||
; X32-AVX512F-NEXT: vmovdqu64 8(%eax), %zmm0 {%k1} {z}
|
||||
; X32-AVX512F-NEXT: vmovdqu64 8(%eax), %zmm0
|
||||
; X32-AVX512F-NEXT: vpandq {{\.LCPI.*}}, %zmm0, %zmm0
|
||||
; X32-AVX512F-NEXT: retl
|
||||
%ptr0 = getelementptr inbounds i64, i64* %ptr, i64 1
|
||||
%ptr2 = getelementptr inbounds i64, i64* %ptr, i64 3
|
||||
|
@ -450,29 +426,17 @@ define <16 x i32> @merge_16i32_i32_0uu3uuuuuuuuCuEF(i32* %ptr) nounwind uwtable
|
|||
}
|
||||
|
||||
define <16 x i32> @merge_16i32_i32_0uu3zzuuuuuzCuEF(i32* %ptr) nounwind uwtable noinline ssp {
|
||||
; AVX512F-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: movw $8240, %ax # imm = 0x2030
|
||||
; AVX512F-NEXT: kmovw %eax, %k0
|
||||
; AVX512F-NEXT: knotw %k0, %k1
|
||||
; AVX512F-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z}
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: movw $8240, %ax # imm = 0x2030
|
||||
; AVX512BW-NEXT: kmovd %eax, %k0
|
||||
; AVX512BW-NEXT: knotw %k0, %k1
|
||||
; AVX512BW-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z}
|
||||
; AVX512BW-NEXT: retq
|
||||
; ALL-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: vmovdqu64 (%rdi), %zmm0
|
||||
; ALL-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; ALL-NEXT: retq
|
||||
;
|
||||
; X32-AVX512F-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF:
|
||||
; X32-AVX512F: # %bb.0:
|
||||
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-AVX512F-NEXT: movw $8240, %cx # imm = 0x2030
|
||||
; X32-AVX512F-NEXT: kmovw %ecx, %k0
|
||||
; X32-AVX512F-NEXT: knotw %k0, %k1
|
||||
; X32-AVX512F-NEXT: vmovdqu32 (%eax), %zmm0 {%k1} {z}
|
||||
; X32-AVX512F-NEXT: vmovdqu64 (%eax), %zmm0
|
||||
; X32-AVX512F-NEXT: vpandd {{\.LCPI.*}}, %zmm0, %zmm0
|
||||
; X32-AVX512F-NEXT: retl
|
||||
%ptr0 = getelementptr inbounds i32, i32* %ptr, i64 0
|
||||
%ptr3 = getelementptr inbounds i32, i32* %ptr, i64 3
|
||||
|
|
|
@ -203,9 +203,9 @@ define <32 x i16> @shuffle_v32i16_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz(<32 x i16> %a
|
|||
;
|
||||
; SKX-LABEL: shuffle_v32i16_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz:
|
||||
; SKX: ## %bb.0:
|
||||
; SKX-NEXT: movl $1, %eax
|
||||
; SKX-NEXT: kmovd %eax, %k1
|
||||
; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
|
||||
; SKX-NEXT: movl $65535, %eax ## imm = 0xFFFF
|
||||
; SKX-NEXT: vmovd %eax, %xmm1
|
||||
; SKX-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
%shuffle = shufflevector <32 x i16> %a, <32 x i16> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
|
||||
ret <32 x i16> %shuffle
|
||||
|
|
Loading…
Reference in New Issue