forked from OSchip/llvm-project
[X86] Add DAG combine to replace vXi64 vzext_movl+scalar_to_vector with vYi32 vzext_movl+scalar_to_vector if the upper 32 bits of the scalar are zero.
We can just use a 32-bit copy and zero in the SSE domain when we zero the upper bits. Remove an isel pattern that becomes dead with this.
This commit is contained in:
parent
d41ea65ee8
commit
d81d451442
|
@ -35971,6 +35971,24 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
|
|||
}
|
||||
}
|
||||
|
||||
// Turn (v2i64 (vzext_movl (scalar_to_vector (i64 X)))) into
|
||||
// (v2i64 (bitcast (v4i32 (vzext_movl (scalar_to_vector (i32 (trunc X)))))))
|
||||
// if the upper bits of the i64 are zero.
|
||||
if (N->getOpcode() == X86ISD::VZEXT_MOVL && N->getOperand(0).hasOneUse() &&
|
||||
N->getOperand(0)->getOpcode() == ISD::SCALAR_TO_VECTOR &&
|
||||
N->getOperand(0).getOperand(0).hasOneUse() &&
|
||||
N->getOperand(0).getOperand(0).getValueType() == MVT::i64) {
|
||||
SDValue In = N->getOperand(0).getOperand(0);
|
||||
APInt Mask = APInt::getHighBitsSet(64, 32);
|
||||
if (DAG.MaskedValueIsZero(In, Mask)) {
|
||||
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, In);
|
||||
MVT VecVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() * 2);
|
||||
SDValue SclVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Trunc);
|
||||
SDValue Movl = DAG.getNode(X86ISD::VZEXT_MOVL, dl, VecVT, SclVec);
|
||||
return DAG.getBitcast(VT, Movl);
|
||||
}
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
|
|
@ -4447,8 +4447,6 @@ let Predicates = [HasAVX512] in {
|
|||
(VMOV64toPQIZrr GR64:$src)>;
|
||||
|
||||
// AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
|
||||
def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
|
||||
(VMOVDI2PDIZrm addr:$src)>;
|
||||
def : Pat<(v4i32 (X86vzload32 addr:$src)),
|
||||
(VMOVDI2PDIZrm addr:$src)>;
|
||||
def : Pat<(v8i32 (X86vzload32 addr:$src)),
|
||||
|
|
|
@ -4232,8 +4232,6 @@ let Predicates = [UseAVX] in {
|
|||
|
||||
// AVX 128-bit movd/movq instructions write zeros in the high 128-bit part.
|
||||
// These instructions also write zeros in the high part of a 256-bit register.
|
||||
def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
|
||||
(VMOVDI2PDIrm addr:$src)>;
|
||||
def : Pat<(v4i32 (X86vzload32 addr:$src)),
|
||||
(VMOVDI2PDIrm addr:$src)>;
|
||||
def : Pat<(v8i32 (X86vzload32 addr:$src)),
|
||||
|
@ -4246,8 +4244,6 @@ let Predicates = [UseSSE2] in {
|
|||
|
||||
def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
|
||||
(MOV64toPQIrr GR64:$src)>;
|
||||
def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
|
||||
(MOVDI2PDIrm addr:$src)>;
|
||||
def : Pat<(v4i32 (X86vzload32 addr:$src)),
|
||||
(MOVDI2PDIrm addr:$src)>;
|
||||
}
|
||||
|
|
|
@ -26,16 +26,23 @@ define <2 x i64> @extract0_i32_zext_insert0_i64_undef(<4 x i32> %x) {
|
|||
}
|
||||
|
||||
define <2 x i64> @extract0_i32_zext_insert0_i64_zero(<4 x i32> %x) {
|
||||
; SSE-LABEL: extract0_i32_zext_insert0_i64_zero:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: movd %xmm0, %eax
|
||||
; SSE-NEXT: movq %rax, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
; SSE2-LABEL: extract0_i32_zext_insert0_i64_zero:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
|
||||
; SSE2-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: extract0_i32_zext_insert0_i64_zero:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: extract0_i32_zext_insert0_i64_zero:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovd %xmm0, %eax
|
||||
; AVX-NEXT: vmovq %rax, %xmm0
|
||||
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; AVX-NEXT: retq
|
||||
%e = extractelement <4 x i32> %x, i32 0
|
||||
%z = zext i32 %e to i64
|
||||
|
@ -62,21 +69,23 @@ define <2 x i64> @extract1_i32_zext_insert0_i64_undef(<4 x i32> %x) {
|
|||
define <2 x i64> @extract1_i32_zext_insert0_i64_zero(<4 x i32> %x) {
|
||||
; SSE2-LABEL: extract1_i32_zext_insert0_i64_zero:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; SSE2-NEXT: movd %xmm0, %eax
|
||||
; SSE2-NEXT: movq %rax, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE2-NEXT: pxor %xmm0, %xmm0
|
||||
; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: extract1_i32_zext_insert0_i64_zero:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: extractps $1, %xmm0, %eax
|
||||
; SSE41-NEXT: movq %rax, %xmm0
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE41-NEXT: pxor %xmm0, %xmm0
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: extract1_i32_zext_insert0_i64_zero:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vextractps $1, %xmm0, %eax
|
||||
; AVX-NEXT: vmovq %rax, %xmm0
|
||||
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; AVX-NEXT: retq
|
||||
%e = extractelement <4 x i32> %x, i32 1
|
||||
%z = zext i32 %e to i64
|
||||
|
@ -105,21 +114,23 @@ define <2 x i64> @extract2_i32_zext_insert0_i64_undef(<4 x i32> %x) {
|
|||
define <2 x i64> @extract2_i32_zext_insert0_i64_zero(<4 x i32> %x) {
|
||||
; SSE2-LABEL: extract2_i32_zext_insert0_i64_zero:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: movd %xmm0, %eax
|
||||
; SSE2-NEXT: movq %rax, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pxor %xmm0, %xmm0
|
||||
; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: extract2_i32_zext_insert0_i64_zero:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: extractps $2, %xmm0, %eax
|
||||
; SSE41-NEXT: movq %rax, %xmm0
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pxor %xmm0, %xmm0
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: extract2_i32_zext_insert0_i64_zero:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vextractps $2, %xmm0, %eax
|
||||
; AVX-NEXT: vmovq %rax, %xmm0
|
||||
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; AVX-NEXT: retq
|
||||
%e = extractelement <4 x i32> %x, i32 2
|
||||
%z = zext i32 %e to i64
|
||||
|
@ -144,23 +155,14 @@ define <2 x i64> @extract3_i32_zext_insert0_i64_undef(<4 x i32> %x) {
|
|||
}
|
||||
|
||||
define <2 x i64> @extract3_i32_zext_insert0_i64_zero(<4 x i32> %x) {
|
||||
; SSE2-LABEL: extract3_i32_zext_insert0_i64_zero:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
||||
; SSE2-NEXT: movd %xmm0, %eax
|
||||
; SSE2-NEXT: movq %rax, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: extract3_i32_zext_insert0_i64_zero:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: extractps $3, %xmm0, %eax
|
||||
; SSE41-NEXT: movq %rax, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
; SSE-LABEL: extract3_i32_zext_insert0_i64_zero:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: extract3_i32_zext_insert0_i64_zero:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vextractps $3, %xmm0, %eax
|
||||
; AVX-NEXT: vmovq %rax, %xmm0
|
||||
; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX-NEXT: retq
|
||||
%e = extractelement <4 x i32> %x, i32 3
|
||||
%z = zext i32 %e to i64
|
||||
|
@ -387,13 +389,13 @@ define <2 x i64> @extract0_i16_zext_insert0_i64_zero(<8 x i16> %x) {
|
|||
; SSE-LABEL: extract0_i16_zext_insert0_i64_zero:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pextrw $0, %xmm0, %eax
|
||||
; SSE-NEXT: movq %rax, %xmm0
|
||||
; SSE-NEXT: movd %eax, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: extract0_i16_zext_insert0_i64_zero:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpextrw $0, %xmm0, %eax
|
||||
; AVX-NEXT: vmovq %rax, %xmm0
|
||||
; AVX-NEXT: vmovd %eax, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%e = extractelement <8 x i16> %x, i32 0
|
||||
%z = zext i16 %e to i64
|
||||
|
@ -423,13 +425,13 @@ define <2 x i64> @extract1_i16_zext_insert0_i64_zero(<8 x i16> %x) {
|
|||
; SSE-LABEL: extract1_i16_zext_insert0_i64_zero:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pextrw $1, %xmm0, %eax
|
||||
; SSE-NEXT: movq %rax, %xmm0
|
||||
; SSE-NEXT: movd %eax, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: extract1_i16_zext_insert0_i64_zero:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpextrw $1, %xmm0, %eax
|
||||
; AVX-NEXT: vmovq %rax, %xmm0
|
||||
; AVX-NEXT: vmovd %eax, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%e = extractelement <8 x i16> %x, i32 1
|
||||
%z = zext i16 %e to i64
|
||||
|
@ -459,13 +461,13 @@ define <2 x i64> @extract2_i16_zext_insert0_i64_zero(<8 x i16> %x) {
|
|||
; SSE-LABEL: extract2_i16_zext_insert0_i64_zero:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pextrw $2, %xmm0, %eax
|
||||
; SSE-NEXT: movq %rax, %xmm0
|
||||
; SSE-NEXT: movd %eax, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: extract2_i16_zext_insert0_i64_zero:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpextrw $2, %xmm0, %eax
|
||||
; AVX-NEXT: vmovq %rax, %xmm0
|
||||
; AVX-NEXT: vmovd %eax, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%e = extractelement <8 x i16> %x, i32 2
|
||||
%z = zext i16 %e to i64
|
||||
|
@ -493,13 +495,13 @@ define <2 x i64> @extract3_i16_zext_insert0_i64_zero(<8 x i16> %x) {
|
|||
; SSE-LABEL: extract3_i16_zext_insert0_i64_zero:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pextrw $3, %xmm0, %eax
|
||||
; SSE-NEXT: movq %rax, %xmm0
|
||||
; SSE-NEXT: movd %eax, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: extract3_i16_zext_insert0_i64_zero:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpextrw $3, %xmm0, %eax
|
||||
; AVX-NEXT: vmovq %rax, %xmm0
|
||||
; AVX-NEXT: vmovd %eax, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%e = extractelement <8 x i16> %x, i32 3
|
||||
%z = zext i16 %e to i64
|
||||
|
|
|
@ -55,7 +55,7 @@ define <16 x i8> @test5(<16 x i8> %V) {
|
|||
; CHECK-LABEL: test5:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl $1, %eax
|
||||
; CHECK-NEXT: movq %rax, %xmm1
|
||||
; CHECK-NEXT: movd %eax, %xmm1
|
||||
; CHECK-NEXT: movdqa %xmm1, (%rax)
|
||||
; CHECK-NEXT: movaps {{.*#+}} xmm1 = [1,1]
|
||||
; CHECK-NEXT: movaps %xmm1, (%rax)
|
||||
|
|
|
@ -12,7 +12,7 @@ define <2 x i64> @test1() nounwind {
|
|||
; X64-LABEL: test1:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movl $1, %eax
|
||||
; X64-NEXT: movq %rax, %xmm0
|
||||
; X64-NEXT: movd %eax, %xmm0
|
||||
; X64-NEXT: retq
|
||||
ret <2 x i64> < i64 1, i64 0 >
|
||||
}
|
||||
|
|
|
@ -20,7 +20,7 @@ define <2 x i64> @test3(i64 %arg) nounwind {
|
|||
; X64-LABEL: test3:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: andl $1234567, %edi # imm = 0x12D687
|
||||
; X64-NEXT: movq %rdi, %xmm0
|
||||
; X64-NEXT: movd %edi, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%A = and i64 %arg, 1234567
|
||||
%B = insertelement <2 x i64> zeroinitializer, i64 %A, i32 0
|
||||
|
|
|
@ -1667,19 +1667,19 @@ define <2 x i64> @foldv2i64() nounwind {
|
|||
; SSE-LABEL: foldv2i64:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: movl $55, %eax
|
||||
; SSE-NEXT: movq %rax, %xmm0
|
||||
; SSE-NEXT: movd %eax, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; NOBW-LABEL: foldv2i64:
|
||||
; NOBW: # %bb.0:
|
||||
; NOBW-NEXT: movl $55, %eax
|
||||
; NOBW-NEXT: vmovq %rax, %xmm0
|
||||
; NOBW-NEXT: vmovd %eax, %xmm0
|
||||
; NOBW-NEXT: retq
|
||||
;
|
||||
; AVX512VLBWDQ-LABEL: foldv2i64:
|
||||
; AVX512VLBWDQ: # %bb.0:
|
||||
; AVX512VLBWDQ-NEXT: movl $55, %eax
|
||||
; AVX512VLBWDQ-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512VLBWDQ-NEXT: vmovd %eax, %xmm0
|
||||
; AVX512VLBWDQ-NEXT: retq
|
||||
;
|
||||
; X32-SSE-LABEL: foldv2i64:
|
||||
|
@ -1695,19 +1695,19 @@ define <2 x i64> @foldv2i64u() nounwind {
|
|||
; SSE-LABEL: foldv2i64u:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: movl $55, %eax
|
||||
; SSE-NEXT: movq %rax, %xmm0
|
||||
; SSE-NEXT: movd %eax, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; NOBW-LABEL: foldv2i64u:
|
||||
; NOBW: # %bb.0:
|
||||
; NOBW-NEXT: movl $55, %eax
|
||||
; NOBW-NEXT: vmovq %rax, %xmm0
|
||||
; NOBW-NEXT: vmovd %eax, %xmm0
|
||||
; NOBW-NEXT: retq
|
||||
;
|
||||
; AVX512VLBWDQ-LABEL: foldv2i64u:
|
||||
; AVX512VLBWDQ: # %bb.0:
|
||||
; AVX512VLBWDQ-NEXT: movl $55, %eax
|
||||
; AVX512VLBWDQ-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512VLBWDQ-NEXT: vmovd %eax, %xmm0
|
||||
; AVX512VLBWDQ-NEXT: retq
|
||||
;
|
||||
; X32-SSE-LABEL: foldv2i64u:
|
||||
|
|
|
@ -140,19 +140,12 @@ define <8 x double> @shuffle_v8f64_06000000(<8 x double> %a, <8 x double> %b) {
|
|||
}
|
||||
|
||||
define <8 x double> @shuffle_v8f64_70000000(<8 x double> %a, <8 x double> %b) {
|
||||
; AVX512F-LABEL: shuffle_v8f64_70000000:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: movl $7, %eax
|
||||
; AVX512F-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: shuffle_v8f64_70000000:
|
||||
; AVX512F-32: # %bb.0:
|
||||
; AVX512F-32-NEXT: movl $7, %eax
|
||||
; AVX512F-32-NEXT: vmovd %eax, %xmm1
|
||||
; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: retl
|
||||
; ALL-LABEL: shuffle_v8f64_70000000:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: movl $7, %eax
|
||||
; ALL-NEXT: vmovd %eax, %xmm1
|
||||
; ALL-NEXT: vpermq %zmm0, %zmm1, %zmm0
|
||||
; ALL-NEXT: ret{{[l|q]}}
|
||||
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
ret <8 x double> %shuffle
|
||||
}
|
||||
|
@ -984,19 +977,12 @@ define <8 x i64> @shuffle_v8i64_06000000(<8 x i64> %a, <8 x i64> %b) {
|
|||
|
||||
define <8 x i64> @shuffle_v8i64_70000000(<8 x i64> %a, <8 x i64> %b) {
|
||||
;
|
||||
; AVX512F-LABEL: shuffle_v8i64_70000000:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: movl $7, %eax
|
||||
; AVX512F-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: shuffle_v8i64_70000000:
|
||||
; AVX512F-32: # %bb.0:
|
||||
; AVX512F-32-NEXT: movl $7, %eax
|
||||
; AVX512F-32-NEXT: vmovd %eax, %xmm1
|
||||
; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: retl
|
||||
; ALL-LABEL: shuffle_v8i64_70000000:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: movl $7, %eax
|
||||
; ALL-NEXT: vmovd %eax, %xmm1
|
||||
; ALL-NEXT: vpermq %zmm0, %zmm1, %zmm0
|
||||
; ALL-NEXT: ret{{[l|q]}}
|
||||
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
ret <8 x i64> %shuffle
|
||||
}
|
||||
|
|
|
@ -1577,37 +1577,37 @@ define <2 x i64> @foldv2i64() nounwind {
|
|||
; SSE-LABEL: foldv2i64:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: movl $8, %eax
|
||||
; SSE-NEXT: movq %rax, %xmm0
|
||||
; SSE-NEXT: movd %eax, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: foldv2i64:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: movl $8, %eax
|
||||
; AVX-NEXT: vmovq %rax, %xmm0
|
||||
; AVX-NEXT: vmovd %eax, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512VPOPCNTDQ-LABEL: foldv2i64:
|
||||
; AVX512VPOPCNTDQ: # %bb.0:
|
||||
; AVX512VPOPCNTDQ-NEXT: movl $8, %eax
|
||||
; AVX512VPOPCNTDQ-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512VPOPCNTDQ-NEXT: vmovd %eax, %xmm0
|
||||
; AVX512VPOPCNTDQ-NEXT: retq
|
||||
;
|
||||
; AVX512VPOPCNTDQVL-LABEL: foldv2i64:
|
||||
; AVX512VPOPCNTDQVL: # %bb.0:
|
||||
; AVX512VPOPCNTDQVL-NEXT: movl $8, %eax
|
||||
; AVX512VPOPCNTDQVL-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512VPOPCNTDQVL-NEXT: vmovd %eax, %xmm0
|
||||
; AVX512VPOPCNTDQVL-NEXT: retq
|
||||
;
|
||||
; BITALG_NOVLX-LABEL: foldv2i64:
|
||||
; BITALG_NOVLX: # %bb.0:
|
||||
; BITALG_NOVLX-NEXT: movl $8, %eax
|
||||
; BITALG_NOVLX-NEXT: vmovq %rax, %xmm0
|
||||
; BITALG_NOVLX-NEXT: vmovd %eax, %xmm0
|
||||
; BITALG_NOVLX-NEXT: retq
|
||||
;
|
||||
; BITALG-LABEL: foldv2i64:
|
||||
; BITALG: # %bb.0:
|
||||
; BITALG-NEXT: movl $8, %eax
|
||||
; BITALG-NEXT: vmovq %rax, %xmm0
|
||||
; BITALG-NEXT: vmovd %eax, %xmm0
|
||||
; BITALG-NEXT: retq
|
||||
;
|
||||
; X32-SSE-LABEL: foldv2i64:
|
||||
|
@ -1623,37 +1623,37 @@ define <2 x i64> @foldv2i64u() nounwind {
|
|||
; SSE-LABEL: foldv2i64u:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: movl $8, %eax
|
||||
; SSE-NEXT: movq %rax, %xmm0
|
||||
; SSE-NEXT: movd %eax, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: foldv2i64u:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: movl $8, %eax
|
||||
; AVX-NEXT: vmovq %rax, %xmm0
|
||||
; AVX-NEXT: vmovd %eax, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512VPOPCNTDQ-LABEL: foldv2i64u:
|
||||
; AVX512VPOPCNTDQ: # %bb.0:
|
||||
; AVX512VPOPCNTDQ-NEXT: movl $8, %eax
|
||||
; AVX512VPOPCNTDQ-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512VPOPCNTDQ-NEXT: vmovd %eax, %xmm0
|
||||
; AVX512VPOPCNTDQ-NEXT: retq
|
||||
;
|
||||
; AVX512VPOPCNTDQVL-LABEL: foldv2i64u:
|
||||
; AVX512VPOPCNTDQVL: # %bb.0:
|
||||
; AVX512VPOPCNTDQVL-NEXT: movl $8, %eax
|
||||
; AVX512VPOPCNTDQVL-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512VPOPCNTDQVL-NEXT: vmovd %eax, %xmm0
|
||||
; AVX512VPOPCNTDQVL-NEXT: retq
|
||||
;
|
||||
; BITALG_NOVLX-LABEL: foldv2i64u:
|
||||
; BITALG_NOVLX: # %bb.0:
|
||||
; BITALG_NOVLX-NEXT: movl $8, %eax
|
||||
; BITALG_NOVLX-NEXT: vmovq %rax, %xmm0
|
||||
; BITALG_NOVLX-NEXT: vmovd %eax, %xmm0
|
||||
; BITALG_NOVLX-NEXT: retq
|
||||
;
|
||||
; BITALG-LABEL: foldv2i64u:
|
||||
; BITALG: # %bb.0:
|
||||
; BITALG-NEXT: movl $8, %eax
|
||||
; BITALG-NEXT: vmovq %rax, %xmm0
|
||||
; BITALG-NEXT: vmovd %eax, %xmm0
|
||||
; BITALG-NEXT: retq
|
||||
;
|
||||
; X32-SSE-LABEL: foldv2i64u:
|
||||
|
|
Loading…
Reference in New Issue