forked from OSchip/llvm-project
[X86] Add DAG combine to merge vzext_movl with the various fp<->int conversion operations that only write the lower 64-bits of an xmm register and zero the rest.
Summary: We have isel patterns for this, but we're missing some load patterns and all broadcast patterns. A DAG combine seems like a better fit for this. Reviewers: RKSimon, spatel Reviewed By: RKSimon Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D56971 llvm-svn: 352260
This commit is contained in:
parent
8068bc9071
commit
7a8e74775c
|
@ -32499,6 +32499,32 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
|
||||||
return SDValue(N, 0);
|
return SDValue(N, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Look for a v2i64/v2f64 VZEXT_MOVL of a node that already produces zeros
|
||||||
|
// in the upper 64 bits.
|
||||||
|
// TODO: Can we generalize this using computeKnownBits.
|
||||||
|
if (N->getOpcode() == X86ISD::VZEXT_MOVL &&
|
||||||
|
(VT == MVT::v2f64 || VT == MVT::v2i64) &&
|
||||||
|
N->getOperand(0).getOpcode() == ISD::BITCAST &&
|
||||||
|
(N->getOperand(0).getOperand(0).getValueType() == MVT::v4f32 ||
|
||||||
|
N->getOperand(0).getOperand(0).getValueType() == MVT::v4i32)) {
|
||||||
|
SDValue In = N->getOperand(0).getOperand(0);
|
||||||
|
switch (In.getOpcode()) {
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
case X86ISD::CVTP2SI: case X86ISD::CVTP2UI:
|
||||||
|
case X86ISD::MCVTP2SI: case X86ISD::MCVTP2UI:
|
||||||
|
case X86ISD::CVTTP2SI: case X86ISD::CVTTP2UI:
|
||||||
|
case X86ISD::MCVTTP2SI: case X86ISD::MCVTTP2UI:
|
||||||
|
case X86ISD::CVTSI2P: case X86ISD::CVTUI2P:
|
||||||
|
case X86ISD::MCVTSI2P: case X86ISD::MCVTUI2P:
|
||||||
|
case X86ISD::VFPROUND: case X86ISD::VMFPROUND:
|
||||||
|
if (In.getOperand(0).getValueType() == MVT::v2f64 ||
|
||||||
|
In.getOperand(0).getValueType() == MVT::v2i64)
|
||||||
|
return N->getOperand(0); // return the bitcast
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Look for a truncating shuffle to v2i32 of a PMULUDQ where one of the
|
// Look for a truncating shuffle to v2i32 of a PMULUDQ where one of the
|
||||||
// operands is an extend from v2i32 to v2i64. Turn it into a pmulld.
|
// operands is an extend from v2i32 to v2i64. Turn it into a pmulld.
|
||||||
// FIXME: This can probably go away once we default to widening legalization.
|
// FIXME: This can probably go away once we default to widening legalization.
|
||||||
|
|
|
@ -8184,12 +8184,6 @@ let Predicates = [HasVLX] in {
|
||||||
v4f32x_info.ImmAllZerosV),
|
v4f32x_info.ImmAllZerosV),
|
||||||
(VCVTPD2PSZ256rmbkz VK4WM:$mask, addr:$src)>;
|
(VCVTPD2PSZ256rmbkz VK4WM:$mask, addr:$src)>;
|
||||||
|
|
||||||
def : Pat<(X86vzmovl (v2f64 (bitconvert
|
|
||||||
(v4f32 (X86vfpround (v2f64 VR128X:$src)))))),
|
|
||||||
(VCVTPD2PSZ128rr VR128X:$src)>;
|
|
||||||
def : Pat<(X86vzmovl (v2f64 (bitconvert
|
|
||||||
(v4f32 (X86vfpround (loadv2f64 addr:$src)))))),
|
|
||||||
(VCVTPD2PSZ128rm addr:$src)>;
|
|
||||||
def : Pat<(v2f64 (extloadv2f32 addr:$src)),
|
def : Pat<(v2f64 (extloadv2f32 addr:$src)),
|
||||||
(VCVTPS2PDZ128rm addr:$src)>;
|
(VCVTPS2PDZ128rm addr:$src)>;
|
||||||
def : Pat<(v4f64 (extloadv4f32 addr:$src)),
|
def : Pat<(v4f64 (extloadv4f32 addr:$src)),
|
||||||
|
@ -8831,25 +8825,6 @@ def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))),
|
||||||
}
|
}
|
||||||
|
|
||||||
let Predicates = [HasAVX512, HasVLX] in {
|
let Predicates = [HasAVX512, HasVLX] in {
|
||||||
def : Pat<(X86vzmovl (v2i64 (bitconvert
|
|
||||||
(v4i32 (X86cvtp2Int (v2f64 VR128X:$src)))))),
|
|
||||||
(VCVTPD2DQZ128rr VR128X:$src)>;
|
|
||||||
def : Pat<(X86vzmovl (v2i64 (bitconvert
|
|
||||||
(v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))),
|
|
||||||
(VCVTPD2DQZ128rm addr:$src)>;
|
|
||||||
def : Pat<(X86vzmovl (v2i64 (bitconvert
|
|
||||||
(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src)))))),
|
|
||||||
(VCVTPD2UDQZ128rr VR128X:$src)>;
|
|
||||||
def : Pat<(X86vzmovl (v2i64 (bitconvert
|
|
||||||
(v4i32 (X86cvttp2si (v2f64 VR128X:$src)))))),
|
|
||||||
(VCVTTPD2DQZ128rr VR128X:$src)>;
|
|
||||||
def : Pat<(X86vzmovl (v2i64 (bitconvert
|
|
||||||
(v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))),
|
|
||||||
(VCVTTPD2DQZ128rm addr:$src)>;
|
|
||||||
def : Pat<(X86vzmovl (v2i64 (bitconvert
|
|
||||||
(v4i32 (X86cvttp2ui (v2f64 VR128X:$src)))))),
|
|
||||||
(VCVTTPD2UDQZ128rr VR128X:$src)>;
|
|
||||||
|
|
||||||
def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
|
def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
|
||||||
(VCVTDQ2PDZ128rm addr:$src)>;
|
(VCVTDQ2PDZ128rm addr:$src)>;
|
||||||
def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
|
def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
|
||||||
|
@ -8862,13 +8837,6 @@ let Predicates = [HasAVX512, HasVLX] in {
|
||||||
}
|
}
|
||||||
|
|
||||||
let Predicates = [HasDQI, HasVLX] in {
|
let Predicates = [HasDQI, HasVLX] in {
|
||||||
def : Pat<(X86vzmovl (v2f64 (bitconvert
|
|
||||||
(v4f32 (X86VSintToFP (v2i64 VR128X:$src)))))),
|
|
||||||
(VCVTQQ2PSZ128rr VR128X:$src)>;
|
|
||||||
def : Pat<(X86vzmovl (v2f64 (bitconvert
|
|
||||||
(v4f32 (X86VUintToFP (v2i64 VR128X:$src)))))),
|
|
||||||
(VCVTUQQ2PSZ128rr VR128X:$src)>;
|
|
||||||
|
|
||||||
// Special patterns to allow use of X86VMSintToFP for masking. Instruction
|
// Special patterns to allow use of X86VMSintToFP for masking. Instruction
|
||||||
// patterns have been disabled with null_frag.
|
// patterns have been disabled with null_frag.
|
||||||
def : Pat<(v4f32 (X86VSintToFP (v2i64 VR128X:$src))),
|
def : Pat<(v4f32 (X86VSintToFP (v2i64 VR128X:$src))),
|
||||||
|
|
|
@ -1617,21 +1617,6 @@ let Predicates = [HasAVX, NoVLX] in {
|
||||||
(VCVTTPD2DQYrm addr:$src)>;
|
(VCVTTPD2DQYrm addr:$src)>;
|
||||||
}
|
}
|
||||||
|
|
||||||
let Predicates = [HasAVX, NoVLX] in {
|
|
||||||
def : Pat<(X86vzmovl (v2i64 (bitconvert
|
|
||||||
(v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))),
|
|
||||||
(VCVTPD2DQrr VR128:$src)>;
|
|
||||||
def : Pat<(X86vzmovl (v2i64 (bitconvert
|
|
||||||
(v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))),
|
|
||||||
(VCVTPD2DQrm addr:$src)>;
|
|
||||||
def : Pat<(X86vzmovl (v2i64 (bitconvert
|
|
||||||
(v4i32 (X86cvttp2si (v2f64 VR128:$src)))))),
|
|
||||||
(VCVTTPD2DQrr VR128:$src)>;
|
|
||||||
def : Pat<(X86vzmovl (v2i64 (bitconvert
|
|
||||||
(v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))),
|
|
||||||
(VCVTTPD2DQrm addr:$src)>;
|
|
||||||
} // Predicates = [HasAVX, NoVLX]
|
|
||||||
|
|
||||||
def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||||
"cvttpd2dq\t{$src, $dst|$dst, $src}",
|
"cvttpd2dq\t{$src, $dst|$dst, $src}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
|
@ -1643,21 +1628,6 @@ def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
|
||||||
(v4i32 (X86cvttp2si (memopv2f64 addr:$src))))]>,
|
(v4i32 (X86cvttp2si (memopv2f64 addr:$src))))]>,
|
||||||
Sched<[WriteCvtPD2ILd]>;
|
Sched<[WriteCvtPD2ILd]>;
|
||||||
|
|
||||||
let Predicates = [UseSSE2] in {
|
|
||||||
def : Pat<(X86vzmovl (v2i64 (bitconvert
|
|
||||||
(v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))),
|
|
||||||
(CVTPD2DQrr VR128:$src)>;
|
|
||||||
def : Pat<(X86vzmovl (v2i64 (bitconvert
|
|
||||||
(v4i32 (X86cvtp2Int (memopv2f64 addr:$src)))))),
|
|
||||||
(CVTPD2DQrm addr:$src)>;
|
|
||||||
def : Pat<(X86vzmovl (v2i64 (bitconvert
|
|
||||||
(v4i32 (X86cvttp2si (v2f64 VR128:$src)))))),
|
|
||||||
(CVTTPD2DQrr VR128:$src)>;
|
|
||||||
def : Pat<(X86vzmovl (v2i64 (bitconvert
|
|
||||||
(v4i32 (X86cvttp2si (memopv2f64 addr:$src)))))),
|
|
||||||
(CVTTPD2DQrm addr:$src)>;
|
|
||||||
} // Predicates = [UseSSE2]
|
|
||||||
|
|
||||||
// Convert packed single to packed double
|
// Convert packed single to packed double
|
||||||
let Predicates = [HasAVX, NoVLX] in {
|
let Predicates = [HasAVX, NoVLX] in {
|
||||||
// SSE2 instructions without OpSize prefix
|
// SSE2 instructions without OpSize prefix
|
||||||
|
@ -1790,33 +1760,11 @@ def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
|
||||||
[(set VR128:$dst, (X86vfpround (memopv2f64 addr:$src)))]>,
|
[(set VR128:$dst, (X86vfpround (memopv2f64 addr:$src)))]>,
|
||||||
Sched<[WriteCvtPD2PS.Folded]>;
|
Sched<[WriteCvtPD2PS.Folded]>;
|
||||||
|
|
||||||
// AVX 256-bit register conversion intrinsics
|
|
||||||
// FIXME: Migrate SSE conversion intrinsics matching to use patterns as below
|
|
||||||
// whenever possible to avoid declaring two versions of each one.
|
|
||||||
|
|
||||||
let Predicates = [HasAVX, NoVLX] in {
|
let Predicates = [HasAVX, NoVLX] in {
|
||||||
def : Pat<(v4f32 (fpround (v4f64 VR256:$src))),
|
def : Pat<(v4f32 (fpround (v4f64 VR256:$src))),
|
||||||
(VCVTPD2PSYrr VR256:$src)>;
|
(VCVTPD2PSYrr VR256:$src)>;
|
||||||
def : Pat<(v4f32 (fpround (loadv4f64 addr:$src))),
|
def : Pat<(v4f32 (fpround (loadv4f64 addr:$src))),
|
||||||
(VCVTPD2PSYrm addr:$src)>;
|
(VCVTPD2PSYrm addr:$src)>;
|
||||||
|
|
||||||
// Match fpround and fpextend for 128/256-bit conversions
|
|
||||||
def : Pat<(X86vzmovl (v2f64 (bitconvert
|
|
||||||
(v4f32 (X86vfpround (v2f64 VR128:$src)))))),
|
|
||||||
(VCVTPD2PSrr VR128:$src)>;
|
|
||||||
def : Pat<(X86vzmovl (v2f64 (bitconvert
|
|
||||||
(v4f32 (X86vfpround (loadv2f64 addr:$src)))))),
|
|
||||||
(VCVTPD2PSrm addr:$src)>;
|
|
||||||
}
|
|
||||||
|
|
||||||
let Predicates = [UseSSE2] in {
|
|
||||||
// Match fpround and fpextend for 128 conversions
|
|
||||||
def : Pat<(X86vzmovl (v2f64 (bitconvert
|
|
||||||
(v4f32 (X86vfpround (v2f64 VR128:$src)))))),
|
|
||||||
(CVTPD2PSrr VR128:$src)>;
|
|
||||||
def : Pat<(X86vzmovl (v2f64 (bitconvert
|
|
||||||
(v4f32 (X86vfpround (memopv2f64 addr:$src)))))),
|
|
||||||
(CVTPD2PSrm addr:$src)>;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
|
@ -223,8 +223,6 @@ define <4 x float>@test_int_x86_avx512_mask_cvt_qq2ps_128_zext(<2 x i64> %x0, <4
|
||||||
; X86: # %bb.0:
|
; X86: # %bb.0:
|
||||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
|
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
|
||||||
; X86-NEXT: vcvtqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x5b,0xc8]
|
; X86-NEXT: vcvtqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x5b,0xc8]
|
||||||
; X86-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9]
|
|
||||||
; X86-NEXT: # xmm1 = xmm1[0],zero
|
|
||||||
; X86-NEXT: vcvtqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x5b,0xc0]
|
; X86-NEXT: vcvtqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x5b,0xc0]
|
||||||
; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
|
; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
|
||||||
; X86-NEXT: retl # encoding: [0xc3]
|
; X86-NEXT: retl # encoding: [0xc3]
|
||||||
|
@ -233,8 +231,6 @@ define <4 x float>@test_int_x86_avx512_mask_cvt_qq2ps_128_zext(<2 x i64> %x0, <4
|
||||||
; X64: # %bb.0:
|
; X64: # %bb.0:
|
||||||
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||||
; X64-NEXT: vcvtqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x5b,0xc8]
|
; X64-NEXT: vcvtqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x5b,0xc8]
|
||||||
; X64-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9]
|
|
||||||
; X64-NEXT: # xmm1 = xmm1[0],zero
|
|
||||||
; X64-NEXT: vcvtqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x5b,0xc0]
|
; X64-NEXT: vcvtqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x5b,0xc0]
|
||||||
; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
|
; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
|
||||||
; X64-NEXT: retq # encoding: [0xc3]
|
; X64-NEXT: retq # encoding: [0xc3]
|
||||||
|
@ -445,8 +441,6 @@ define <4 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_128_zext(<2 x i64> %x0, <
|
||||||
; X86: # %bb.0:
|
; X86: # %bb.0:
|
||||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
|
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
|
||||||
; X86-NEXT: vcvtuqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7a,0xc8]
|
; X86-NEXT: vcvtuqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7a,0xc8]
|
||||||
; X86-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9]
|
|
||||||
; X86-NEXT: # xmm1 = xmm1[0],zero
|
|
||||||
; X86-NEXT: vcvtuqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xff,0x08,0x7a,0xc0]
|
; X86-NEXT: vcvtuqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xff,0x08,0x7a,0xc0]
|
||||||
; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
|
; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
|
||||||
; X86-NEXT: retl # encoding: [0xc3]
|
; X86-NEXT: retl # encoding: [0xc3]
|
||||||
|
@ -455,8 +449,6 @@ define <4 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_128_zext(<2 x i64> %x0, <
|
||||||
; X64: # %bb.0:
|
; X64: # %bb.0:
|
||||||
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||||
; X64-NEXT: vcvtuqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7a,0xc8]
|
; X64-NEXT: vcvtuqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7a,0xc8]
|
||||||
; X64-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9]
|
|
||||||
; X64-NEXT: # xmm1 = xmm1[0],zero
|
|
||||||
; X64-NEXT: vcvtuqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xff,0x08,0x7a,0xc0]
|
; X64-NEXT: vcvtuqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xff,0x08,0x7a,0xc0]
|
||||||
; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
|
; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
|
||||||
; X64-NEXT: retq # encoding: [0xc3]
|
; X64-NEXT: retq # encoding: [0xc3]
|
||||||
|
|
|
@ -3327,8 +3327,6 @@ define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_128_zext(<2 x double> %x0, <
|
||||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
|
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
|
||||||
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
|
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
|
||||||
; X86-NEXT: vcvtpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0xe6,0xc8]
|
; X86-NEXT: vcvtpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0xe6,0xc8]
|
||||||
; X86-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9]
|
|
||||||
; X86-NEXT: # xmm1 = xmm1[0],zero
|
|
||||||
; X86-NEXT: vcvtpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0]
|
; X86-NEXT: vcvtpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0]
|
||||||
; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
|
; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
|
||||||
; X86-NEXT: retl # encoding: [0xc3]
|
; X86-NEXT: retl # encoding: [0xc3]
|
||||||
|
@ -3337,8 +3335,6 @@ define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_128_zext(<2 x double> %x0, <
|
||||||
; X64: # %bb.0:
|
; X64: # %bb.0:
|
||||||
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||||
; X64-NEXT: vcvtpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0xe6,0xc8]
|
; X64-NEXT: vcvtpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0xe6,0xc8]
|
||||||
; X64-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9]
|
|
||||||
; X64-NEXT: # xmm1 = xmm1[0],zero
|
|
||||||
; X64-NEXT: vcvtpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0]
|
; X64-NEXT: vcvtpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0]
|
||||||
; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
|
; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
|
||||||
; X64-NEXT: retq # encoding: [0xc3]
|
; X64-NEXT: retq # encoding: [0xc3]
|
||||||
|
@ -3381,8 +3377,6 @@ define <4 x float>@test_int_x86_avx512_mask_cvt_pd2ps_zext(<2 x double> %x0, <4
|
||||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
|
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
|
||||||
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
|
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
|
||||||
; X86-NEXT: vcvtpd2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x5a,0xc8]
|
; X86-NEXT: vcvtpd2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x5a,0xc8]
|
||||||
; X86-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9]
|
|
||||||
; X86-NEXT: # xmm1 = xmm1[0],zero
|
|
||||||
; X86-NEXT: vcvtpd2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0xc0]
|
; X86-NEXT: vcvtpd2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0xc0]
|
||||||
; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
|
; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
|
||||||
; X86-NEXT: retl # encoding: [0xc3]
|
; X86-NEXT: retl # encoding: [0xc3]
|
||||||
|
@ -3391,8 +3385,6 @@ define <4 x float>@test_int_x86_avx512_mask_cvt_pd2ps_zext(<2 x double> %x0, <4
|
||||||
; X64: # %bb.0:
|
; X64: # %bb.0:
|
||||||
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||||
; X64-NEXT: vcvtpd2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x5a,0xc8]
|
; X64-NEXT: vcvtpd2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x5a,0xc8]
|
||||||
; X64-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9]
|
|
||||||
; X64-NEXT: # xmm1 = xmm1[0],zero
|
|
||||||
; X64-NEXT: vcvtpd2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0xc0]
|
; X64-NEXT: vcvtpd2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0xc0]
|
||||||
; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
|
; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
|
||||||
; X64-NEXT: retq # encoding: [0xc3]
|
; X64-NEXT: retq # encoding: [0xc3]
|
||||||
|
@ -3435,8 +3427,6 @@ define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_128_zext(<2 x double> %x0,
|
||||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
|
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
|
||||||
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
|
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
|
||||||
; X86-NEXT: vcvtpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x79,0xc8]
|
; X86-NEXT: vcvtpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x79,0xc8]
|
||||||
; X86-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9]
|
|
||||||
; X86-NEXT: # xmm1 = xmm1[0],zero
|
|
||||||
; X86-NEXT: vcvtpd2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x79,0xc0]
|
; X86-NEXT: vcvtpd2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x79,0xc0]
|
||||||
; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
|
; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
|
||||||
; X86-NEXT: retl # encoding: [0xc3]
|
; X86-NEXT: retl # encoding: [0xc3]
|
||||||
|
@ -3445,8 +3435,6 @@ define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_128_zext(<2 x double> %x0,
|
||||||
; X64: # %bb.0:
|
; X64: # %bb.0:
|
||||||
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||||
; X64-NEXT: vcvtpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x79,0xc8]
|
; X64-NEXT: vcvtpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x79,0xc8]
|
||||||
; X64-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9]
|
|
||||||
; X64-NEXT: # xmm1 = xmm1[0],zero
|
|
||||||
; X64-NEXT: vcvtpd2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x79,0xc0]
|
; X64-NEXT: vcvtpd2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x79,0xc0]
|
||||||
; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
|
; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
|
||||||
; X64-NEXT: retq # encoding: [0xc3]
|
; X64-NEXT: retq # encoding: [0xc3]
|
||||||
|
@ -3616,8 +3604,6 @@ define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_128_zext(<2 x double> %x0,
|
||||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
|
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
|
||||||
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
|
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
|
||||||
; X86-NEXT: vcvttpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xe6,0xc8]
|
; X86-NEXT: vcvttpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xe6,0xc8]
|
||||||
; X86-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9]
|
|
||||||
; X86-NEXT: # xmm1 = xmm1[0],zero
|
|
||||||
; X86-NEXT: vcvttpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0xc0]
|
; X86-NEXT: vcvttpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0xc0]
|
||||||
; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
|
; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
|
||||||
; X86-NEXT: retl # encoding: [0xc3]
|
; X86-NEXT: retl # encoding: [0xc3]
|
||||||
|
@ -3626,8 +3612,6 @@ define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_128_zext(<2 x double> %x0,
|
||||||
; X64: # %bb.0:
|
; X64: # %bb.0:
|
||||||
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||||
; X64-NEXT: vcvttpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xe6,0xc8]
|
; X64-NEXT: vcvttpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xe6,0xc8]
|
||||||
; X64-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9]
|
|
||||||
; X64-NEXT: # xmm1 = xmm1[0],zero
|
|
||||||
; X64-NEXT: vcvttpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0xc0]
|
; X64-NEXT: vcvttpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0xc0]
|
||||||
; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
|
; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
|
||||||
; X64-NEXT: retq # encoding: [0xc3]
|
; X64-NEXT: retq # encoding: [0xc3]
|
||||||
|
@ -3670,8 +3654,6 @@ define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_128_zext(<2 x double> %x0,
|
||||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
|
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
|
||||||
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
|
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
|
||||||
; X86-NEXT: vcvttpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x78,0xc8]
|
; X86-NEXT: vcvttpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x78,0xc8]
|
||||||
; X86-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9]
|
|
||||||
; X86-NEXT: # xmm1 = xmm1[0],zero
|
|
||||||
; X86-NEXT: vcvttpd2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x78,0xc0]
|
; X86-NEXT: vcvttpd2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x78,0xc0]
|
||||||
; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
|
; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
|
||||||
; X86-NEXT: retl # encoding: [0xc3]
|
; X86-NEXT: retl # encoding: [0xc3]
|
||||||
|
@ -3680,8 +3662,6 @@ define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_128_zext(<2 x double> %x0,
|
||||||
; X64: # %bb.0:
|
; X64: # %bb.0:
|
||||||
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||||
; X64-NEXT: vcvttpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x78,0xc8]
|
; X64-NEXT: vcvttpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x78,0xc8]
|
||||||
; X64-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9]
|
|
||||||
; X64-NEXT: # xmm1 = xmm1[0],zero
|
|
||||||
; X64-NEXT: vcvttpd2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x78,0xc0]
|
; X64-NEXT: vcvttpd2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x78,0xc0]
|
||||||
; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
|
; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
|
||||||
; X64-NEXT: retq # encoding: [0xc3]
|
; X64-NEXT: retq # encoding: [0xc3]
|
||||||
|
|
Loading…
Reference in New Issue