forked from OSchip/llvm-project
[X86] Custom type legalize v2i32/v4i16/v8i8->i64 bitcasts in 64-bit mode similar to what's done when the destination is f64.
The generic legalizer will fall back to a stack spill that uses a truncating store. That store will get expanded into a shuffle and non-truncating store on pre-avx512 targets. Once that happens the stack store/load pair will be combined away leaving behind the shuffle and bitcasts. On avx512 targets the truncating store is legal so doesn't get folded away. By custom legalizing it we can avoid this churn and maybe produce better code. llvm-svn: 348085
This commit is contained in:
parent
0ff50d49d1
commit
ec096a1dae
|
@ -25221,7 +25221,7 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget &Subtarget,
|
|||
if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8 ||
|
||||
SrcVT == MVT::i64) {
|
||||
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
|
||||
if (DstVT != MVT::f64)
|
||||
if (DstVT != MVT::f64 && DstVT != MVT::i64)
|
||||
// This conversion needs to be expanded.
|
||||
return SDValue();
|
||||
|
||||
|
@ -25253,8 +25253,9 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget &Subtarget,
|
|||
|
||||
EVT NewVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumElts * 2);
|
||||
SDValue BV = DAG.getBuildVector(NewVT, dl, Elts);
|
||||
SDValue ToV2F64 = DAG.getBitcast(MVT::v2f64, BV);
|
||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, ToV2F64,
|
||||
MVT V2X64VT = MVT::getVectorVT(DstVT, 2);
|
||||
SDValue ToV2X64 = DAG.getBitcast(V2X64VT, BV);
|
||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, DstVT, ToV2X64,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
}
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@ define x86_mmx @mmx_movzl(x86_mmx %x) nounwind {
|
|||
; X64-LABEL: mmx_movzl:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: movl $32, %eax
|
||||
; X64-NEXT: movq %rax, %xmm0
|
||||
; X64-NEXT: movd %eax, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%tmp = bitcast x86_mmx %x to <2 x i32>
|
||||
%tmp3 = insertelement <2 x i32> %tmp, i32 32, i32 0
|
||||
|
|
|
@ -13,10 +13,8 @@ define x86_mmx @t0(i32 %A) nounwind {
|
|||
;
|
||||
; X64-LABEL: t0:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: ## kill: def $edi killed $edi def $rdi
|
||||
; X64-NEXT: movq %rdi, %xmm0
|
||||
; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; X64-NEXT: movd %edi, %xmm0
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
|
||||
; X64-NEXT: retq
|
||||
%tmp3 = insertelement <2 x i32> < i32 0, i32 undef >, i32 %A, i32 1
|
||||
%tmp4 = bitcast <2 x i32> %tmp3 to x86_mmx
|
||||
|
|
|
@ -22,224 +22,63 @@ define float @cvt_i16_to_f32(i16 %a0) nounwind {
|
|||
}
|
||||
|
||||
define <4 x float> @cvt_4i16_to_4f32(<4 x i16> %a0) nounwind {
|
||||
; AVX1-LABEL: cvt_4i16_to_4f32:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
|
||||
; AVX1-NEXT: vmovq %xmm0, %rax
|
||||
; AVX1-NEXT: movq %rax, %rcx
|
||||
; AVX1-NEXT: movq %rax, %rdx
|
||||
; AVX1-NEXT: movswl %ax, %esi
|
||||
; AVX1-NEXT: # kill: def $eax killed $eax killed $rax
|
||||
; AVX1-NEXT: shrl $16, %eax
|
||||
; AVX1-NEXT: shrq $32, %rcx
|
||||
; AVX1-NEXT: shrq $48, %rdx
|
||||
; AVX1-NEXT: movswl %dx, %edx
|
||||
; AVX1-NEXT: vmovd %edx, %xmm0
|
||||
; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX1-NEXT: movswl %cx, %ecx
|
||||
; AVX1-NEXT: vmovd %ecx, %xmm1
|
||||
; AVX1-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX1-NEXT: cwtl
|
||||
; AVX1-NEXT: vmovd %eax, %xmm2
|
||||
; AVX1-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; AVX1-NEXT: vmovd %esi, %xmm3
|
||||
; AVX1-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
|
||||
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
|
||||
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: cvt_4i16_to_4f32:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
|
||||
; AVX2-NEXT: vmovq %xmm0, %rax
|
||||
; AVX2-NEXT: movq %rax, %rcx
|
||||
; AVX2-NEXT: movq %rax, %rdx
|
||||
; AVX2-NEXT: movswl %ax, %esi
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax killed $rax
|
||||
; AVX2-NEXT: shrl $16, %eax
|
||||
; AVX2-NEXT: shrq $32, %rcx
|
||||
; AVX2-NEXT: shrq $48, %rdx
|
||||
; AVX2-NEXT: movswl %dx, %edx
|
||||
; AVX2-NEXT: vmovd %edx, %xmm0
|
||||
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX2-NEXT: movswl %cx, %ecx
|
||||
; AVX2-NEXT: vmovd %ecx, %xmm1
|
||||
; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX2-NEXT: cwtl
|
||||
; AVX2-NEXT: vmovd %eax, %xmm2
|
||||
; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; AVX2-NEXT: vmovd %esi, %xmm3
|
||||
; AVX2-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
|
||||
; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
|
||||
; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: cvt_4i16_to_4f32:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
|
||||
; AVX512F-NEXT: vmovq %xmm0, %rax
|
||||
; AVX512F-NEXT: movq %rax, %rcx
|
||||
; AVX512F-NEXT: movq %rax, %rdx
|
||||
; AVX512F-NEXT: movswl %ax, %esi
|
||||
; AVX512F-NEXT: # kill: def $eax killed $eax killed $rax
|
||||
; AVX512F-NEXT: shrl $16, %eax
|
||||
; AVX512F-NEXT: shrq $32, %rcx
|
||||
; AVX512F-NEXT: shrq $48, %rdx
|
||||
; AVX512F-NEXT: movswl %dx, %edx
|
||||
; AVX512F-NEXT: vmovd %edx, %xmm0
|
||||
; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX512F-NEXT: movswl %cx, %ecx
|
||||
; AVX512F-NEXT: vmovd %ecx, %xmm1
|
||||
; AVX512F-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX512F-NEXT: cwtl
|
||||
; AVX512F-NEXT: vmovd %eax, %xmm2
|
||||
; AVX512F-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vmovd %esi, %xmm3
|
||||
; AVX512F-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: cvt_4i16_to_4f32:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpmovdw %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX512VL-NEXT: movq -{{[0-9]+}}(%rsp), %rax
|
||||
; AVX512VL-NEXT: movq %rax, %rcx
|
||||
; AVX512VL-NEXT: movq %rax, %rdx
|
||||
; AVX512VL-NEXT: movswl %ax, %esi
|
||||
; AVX512VL-NEXT: # kill: def $eax killed $eax killed $rax
|
||||
; AVX512VL-NEXT: shrl $16, %eax
|
||||
; AVX512VL-NEXT: shrq $32, %rcx
|
||||
; AVX512VL-NEXT: shrq $48, %rdx
|
||||
; AVX512VL-NEXT: movswl %dx, %edx
|
||||
; AVX512VL-NEXT: vmovd %edx, %xmm0
|
||||
; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: movswl %cx, %ecx
|
||||
; AVX512VL-NEXT: vmovd %ecx, %xmm1
|
||||
; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: cwtl
|
||||
; AVX512VL-NEXT: vmovd %eax, %xmm2
|
||||
; AVX512VL-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vmovd %esi, %xmm3
|
||||
; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
|
||||
; AVX512VL-NEXT: retq
|
||||
; ALL-LABEL: cvt_4i16_to_4f32:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
|
||||
; ALL-NEXT: vmovq %xmm0, %rax
|
||||
; ALL-NEXT: movq %rax, %rcx
|
||||
; ALL-NEXT: movq %rax, %rdx
|
||||
; ALL-NEXT: movswl %ax, %esi
|
||||
; ALL-NEXT: # kill: def $eax killed $eax killed $rax
|
||||
; ALL-NEXT: shrl $16, %eax
|
||||
; ALL-NEXT: shrq $32, %rcx
|
||||
; ALL-NEXT: shrq $48, %rdx
|
||||
; ALL-NEXT: movswl %dx, %edx
|
||||
; ALL-NEXT: vmovd %edx, %xmm0
|
||||
; ALL-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; ALL-NEXT: movswl %cx, %ecx
|
||||
; ALL-NEXT: vmovd %ecx, %xmm1
|
||||
; ALL-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; ALL-NEXT: cwtl
|
||||
; ALL-NEXT: vmovd %eax, %xmm2
|
||||
; ALL-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; ALL-NEXT: vmovd %esi, %xmm3
|
||||
; ALL-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; ALL-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
|
||||
; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
|
||||
; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
|
||||
; ALL-NEXT: retq
|
||||
%1 = bitcast <4 x i16> %a0 to <4 x half>
|
||||
%2 = fpext <4 x half> %1 to <4 x float>
|
||||
ret <4 x float> %2
|
||||
}
|
||||
|
||||
define <4 x float> @cvt_8i16_to_4f32(<8 x i16> %a0) nounwind {
|
||||
; AVX1-LABEL: cvt_8i16_to_4f32:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovq %xmm0, %rax
|
||||
; AVX1-NEXT: movq %rax, %rcx
|
||||
; AVX1-NEXT: movq %rax, %rdx
|
||||
; AVX1-NEXT: movswl %ax, %esi
|
||||
; AVX1-NEXT: # kill: def $eax killed $eax killed $rax
|
||||
; AVX1-NEXT: shrl $16, %eax
|
||||
; AVX1-NEXT: shrq $32, %rcx
|
||||
; AVX1-NEXT: shrq $48, %rdx
|
||||
; AVX1-NEXT: movswl %dx, %edx
|
||||
; AVX1-NEXT: vmovd %edx, %xmm0
|
||||
; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX1-NEXT: movswl %cx, %ecx
|
||||
; AVX1-NEXT: vmovd %ecx, %xmm1
|
||||
; AVX1-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX1-NEXT: cwtl
|
||||
; AVX1-NEXT: vmovd %eax, %xmm2
|
||||
; AVX1-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; AVX1-NEXT: vmovd %esi, %xmm3
|
||||
; AVX1-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
|
||||
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
|
||||
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: cvt_8i16_to_4f32:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovq %xmm0, %rax
|
||||
; AVX2-NEXT: movq %rax, %rcx
|
||||
; AVX2-NEXT: movq %rax, %rdx
|
||||
; AVX2-NEXT: movswl %ax, %esi
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax killed $rax
|
||||
; AVX2-NEXT: shrl $16, %eax
|
||||
; AVX2-NEXT: shrq $32, %rcx
|
||||
; AVX2-NEXT: shrq $48, %rdx
|
||||
; AVX2-NEXT: movswl %dx, %edx
|
||||
; AVX2-NEXT: vmovd %edx, %xmm0
|
||||
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX2-NEXT: movswl %cx, %ecx
|
||||
; AVX2-NEXT: vmovd %ecx, %xmm1
|
||||
; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX2-NEXT: cwtl
|
||||
; AVX2-NEXT: vmovd %eax, %xmm2
|
||||
; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; AVX2-NEXT: vmovd %esi, %xmm3
|
||||
; AVX2-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
|
||||
; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
|
||||
; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: cvt_8i16_to_4f32:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovq %xmm0, %rax
|
||||
; AVX512F-NEXT: movq %rax, %rcx
|
||||
; AVX512F-NEXT: movq %rax, %rdx
|
||||
; AVX512F-NEXT: movswl %ax, %esi
|
||||
; AVX512F-NEXT: # kill: def $eax killed $eax killed $rax
|
||||
; AVX512F-NEXT: shrl $16, %eax
|
||||
; AVX512F-NEXT: shrq $32, %rcx
|
||||
; AVX512F-NEXT: shrq $48, %rdx
|
||||
; AVX512F-NEXT: movswl %dx, %edx
|
||||
; AVX512F-NEXT: vmovd %edx, %xmm0
|
||||
; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX512F-NEXT: movswl %cx, %ecx
|
||||
; AVX512F-NEXT: vmovd %ecx, %xmm1
|
||||
; AVX512F-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX512F-NEXT: cwtl
|
||||
; AVX512F-NEXT: vmovd %eax, %xmm2
|
||||
; AVX512F-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vmovd %esi, %xmm3
|
||||
; AVX512F-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: cvt_8i16_to_4f32:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX512VL-NEXT: vpmovdw %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX512VL-NEXT: movq -{{[0-9]+}}(%rsp), %rax
|
||||
; AVX512VL-NEXT: movq %rax, %rcx
|
||||
; AVX512VL-NEXT: movq %rax, %rdx
|
||||
; AVX512VL-NEXT: movswl %ax, %esi
|
||||
; AVX512VL-NEXT: # kill: def $eax killed $eax killed $rax
|
||||
; AVX512VL-NEXT: shrl $16, %eax
|
||||
; AVX512VL-NEXT: shrq $32, %rcx
|
||||
; AVX512VL-NEXT: shrq $48, %rdx
|
||||
; AVX512VL-NEXT: movswl %dx, %edx
|
||||
; AVX512VL-NEXT: vmovd %edx, %xmm0
|
||||
; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: movswl %cx, %ecx
|
||||
; AVX512VL-NEXT: vmovd %ecx, %xmm1
|
||||
; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: cwtl
|
||||
; AVX512VL-NEXT: vmovd %eax, %xmm2
|
||||
; AVX512VL-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vmovd %esi, %xmm3
|
||||
; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
|
||||
; AVX512VL-NEXT: retq
|
||||
; ALL-LABEL: cvt_8i16_to_4f32:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: vmovq %xmm0, %rax
|
||||
; ALL-NEXT: movq %rax, %rcx
|
||||
; ALL-NEXT: movq %rax, %rdx
|
||||
; ALL-NEXT: movswl %ax, %esi
|
||||
; ALL-NEXT: # kill: def $eax killed $eax killed $rax
|
||||
; ALL-NEXT: shrl $16, %eax
|
||||
; ALL-NEXT: shrq $32, %rcx
|
||||
; ALL-NEXT: shrq $48, %rdx
|
||||
; ALL-NEXT: movswl %dx, %edx
|
||||
; ALL-NEXT: vmovd %edx, %xmm0
|
||||
; ALL-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; ALL-NEXT: movswl %cx, %ecx
|
||||
; ALL-NEXT: vmovd %ecx, %xmm1
|
||||
; ALL-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; ALL-NEXT: cwtl
|
||||
; ALL-NEXT: vmovd %eax, %xmm2
|
||||
; ALL-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; ALL-NEXT: vmovd %esi, %xmm3
|
||||
; ALL-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; ALL-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
|
||||
; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
|
||||
; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
|
||||
; ALL-NEXT: retq
|
||||
%1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%2 = bitcast <4 x i16> %1 to <4 x half>
|
||||
%3 = fpext <4 x half> %2 to <4 x float>
|
||||
|
@ -730,111 +569,31 @@ define <4 x float> @load_cvt_4i16_to_4f32(<4 x i16>* %a0) nounwind {
|
|||
}
|
||||
|
||||
define <4 x float> @load_cvt_8i16_to_4f32(<8 x i16>* %a0) nounwind {
|
||||
; AVX1-LABEL: load_cvt_8i16_to_4f32:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: movq (%rdi), %rax
|
||||
; AVX1-NEXT: movq %rax, %rcx
|
||||
; AVX1-NEXT: movq %rax, %rdx
|
||||
; AVX1-NEXT: movswl %ax, %esi
|
||||
; AVX1-NEXT: # kill: def $eax killed $eax killed $rax
|
||||
; AVX1-NEXT: shrl $16, %eax
|
||||
; AVX1-NEXT: shrq $32, %rcx
|
||||
; AVX1-NEXT: shrq $48, %rdx
|
||||
; AVX1-NEXT: movswl %dx, %edx
|
||||
; AVX1-NEXT: vmovd %edx, %xmm0
|
||||
; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX1-NEXT: movswl %cx, %ecx
|
||||
; AVX1-NEXT: vmovd %ecx, %xmm1
|
||||
; AVX1-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX1-NEXT: cwtl
|
||||
; AVX1-NEXT: vmovd %eax, %xmm2
|
||||
; AVX1-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; AVX1-NEXT: vmovd %esi, %xmm3
|
||||
; AVX1-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
|
||||
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
|
||||
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: load_cvt_8i16_to_4f32:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: movq (%rdi), %rax
|
||||
; AVX2-NEXT: movq %rax, %rcx
|
||||
; AVX2-NEXT: movq %rax, %rdx
|
||||
; AVX2-NEXT: movswl %ax, %esi
|
||||
; AVX2-NEXT: # kill: def $eax killed $eax killed $rax
|
||||
; AVX2-NEXT: shrl $16, %eax
|
||||
; AVX2-NEXT: shrq $32, %rcx
|
||||
; AVX2-NEXT: shrq $48, %rdx
|
||||
; AVX2-NEXT: movswl %dx, %edx
|
||||
; AVX2-NEXT: vmovd %edx, %xmm0
|
||||
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX2-NEXT: movswl %cx, %ecx
|
||||
; AVX2-NEXT: vmovd %ecx, %xmm1
|
||||
; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX2-NEXT: cwtl
|
||||
; AVX2-NEXT: vmovd %eax, %xmm2
|
||||
; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; AVX2-NEXT: vmovd %esi, %xmm3
|
||||
; AVX2-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
|
||||
; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
|
||||
; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: load_cvt_8i16_to_4f32:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: movq (%rdi), %rax
|
||||
; AVX512F-NEXT: movq %rax, %rcx
|
||||
; AVX512F-NEXT: movq %rax, %rdx
|
||||
; AVX512F-NEXT: movswl %ax, %esi
|
||||
; AVX512F-NEXT: # kill: def $eax killed $eax killed $rax
|
||||
; AVX512F-NEXT: shrl $16, %eax
|
||||
; AVX512F-NEXT: shrq $32, %rcx
|
||||
; AVX512F-NEXT: shrq $48, %rdx
|
||||
; AVX512F-NEXT: movswl %dx, %edx
|
||||
; AVX512F-NEXT: vmovd %edx, %xmm0
|
||||
; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX512F-NEXT: movswl %cx, %ecx
|
||||
; AVX512F-NEXT: vmovd %ecx, %xmm1
|
||||
; AVX512F-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX512F-NEXT: cwtl
|
||||
; AVX512F-NEXT: vmovd %eax, %xmm2
|
||||
; AVX512F-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vmovd %esi, %xmm3
|
||||
; AVX512F-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: load_cvt_8i16_to_4f32:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
|
||||
; AVX512VL-NEXT: vpmovdw %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX512VL-NEXT: movq -{{[0-9]+}}(%rsp), %rax
|
||||
; AVX512VL-NEXT: movq %rax, %rcx
|
||||
; AVX512VL-NEXT: movq %rax, %rdx
|
||||
; AVX512VL-NEXT: movswl %ax, %esi
|
||||
; AVX512VL-NEXT: # kill: def $eax killed $eax killed $rax
|
||||
; AVX512VL-NEXT: shrl $16, %eax
|
||||
; AVX512VL-NEXT: shrq $32, %rcx
|
||||
; AVX512VL-NEXT: shrq $48, %rdx
|
||||
; AVX512VL-NEXT: movswl %dx, %edx
|
||||
; AVX512VL-NEXT: vmovd %edx, %xmm0
|
||||
; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: movswl %cx, %ecx
|
||||
; AVX512VL-NEXT: vmovd %ecx, %xmm1
|
||||
; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: cwtl
|
||||
; AVX512VL-NEXT: vmovd %eax, %xmm2
|
||||
; AVX512VL-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vmovd %esi, %xmm3
|
||||
; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
|
||||
; AVX512VL-NEXT: retq
|
||||
; ALL-LABEL: load_cvt_8i16_to_4f32:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: movq (%rdi), %rax
|
||||
; ALL-NEXT: movq %rax, %rcx
|
||||
; ALL-NEXT: movq %rax, %rdx
|
||||
; ALL-NEXT: movswl %ax, %esi
|
||||
; ALL-NEXT: # kill: def $eax killed $eax killed $rax
|
||||
; ALL-NEXT: shrl $16, %eax
|
||||
; ALL-NEXT: shrq $32, %rcx
|
||||
; ALL-NEXT: shrq $48, %rdx
|
||||
; ALL-NEXT: movswl %dx, %edx
|
||||
; ALL-NEXT: vmovd %edx, %xmm0
|
||||
; ALL-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; ALL-NEXT: movswl %cx, %ecx
|
||||
; ALL-NEXT: vmovd %ecx, %xmm1
|
||||
; ALL-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; ALL-NEXT: cwtl
|
||||
; ALL-NEXT: vmovd %eax, %xmm2
|
||||
; ALL-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; ALL-NEXT: vmovd %esi, %xmm3
|
||||
; ALL-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; ALL-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
|
||||
; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
|
||||
; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
|
||||
; ALL-NEXT: retq
|
||||
%1 = load <8 x i16>, <8 x i16>* %a0
|
||||
%2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%3 = bitcast <4 x i16> %2 to <4 x half>
|
||||
|
@ -1261,125 +1020,35 @@ define <2 x double> @cvt_2i16_to_2f64(<2 x i16> %a0) nounwind {
|
|||
}
|
||||
|
||||
define <4 x double> @cvt_4i16_to_4f64(<4 x i16> %a0) nounwind {
|
||||
; AVX1-LABEL: cvt_4i16_to_4f64:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
|
||||
; AVX1-NEXT: vmovq %xmm0, %rax
|
||||
; AVX1-NEXT: movq %rax, %rcx
|
||||
; AVX1-NEXT: movl %eax, %edx
|
||||
; AVX1-NEXT: movswl %ax, %esi
|
||||
; AVX1-NEXT: shrq $48, %rax
|
||||
; AVX1-NEXT: shrq $32, %rcx
|
||||
; AVX1-NEXT: shrl $16, %edx
|
||||
; AVX1-NEXT: movswl %dx, %edx
|
||||
; AVX1-NEXT: vmovd %edx, %xmm0
|
||||
; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %esi, %xmm1
|
||||
; AVX1-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX1-NEXT: movswl %cx, %ecx
|
||||
; AVX1-NEXT: vmovd %ecx, %xmm2
|
||||
; AVX1-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; AVX1-NEXT: cwtl
|
||||
; AVX1-NEXT: vmovd %eax, %xmm3
|
||||
; AVX1-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; AVX1-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
||||
; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: cvt_4i16_to_4f64:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
|
||||
; AVX2-NEXT: vmovq %xmm0, %rax
|
||||
; AVX2-NEXT: movq %rax, %rcx
|
||||
; AVX2-NEXT: movl %eax, %edx
|
||||
; AVX2-NEXT: movswl %ax, %esi
|
||||
; AVX2-NEXT: shrq $48, %rax
|
||||
; AVX2-NEXT: shrq $32, %rcx
|
||||
; AVX2-NEXT: shrl $16, %edx
|
||||
; AVX2-NEXT: movswl %dx, %edx
|
||||
; AVX2-NEXT: vmovd %edx, %xmm0
|
||||
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %esi, %xmm1
|
||||
; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX2-NEXT: movswl %cx, %ecx
|
||||
; AVX2-NEXT: vmovd %ecx, %xmm2
|
||||
; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; AVX2-NEXT: cwtl
|
||||
; AVX2-NEXT: vmovd %eax, %xmm3
|
||||
; AVX2-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; AVX2-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
|
||||
; AVX2-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
||||
; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: cvt_4i16_to_4f64:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
|
||||
; AVX512F-NEXT: vmovq %xmm0, %rax
|
||||
; AVX512F-NEXT: movq %rax, %rcx
|
||||
; AVX512F-NEXT: movl %eax, %edx
|
||||
; AVX512F-NEXT: movswl %ax, %esi
|
||||
; AVX512F-NEXT: shrq $48, %rax
|
||||
; AVX512F-NEXT: shrq $32, %rcx
|
||||
; AVX512F-NEXT: shrl $16, %edx
|
||||
; AVX512F-NEXT: movswl %dx, %edx
|
||||
; AVX512F-NEXT: vmovd %edx, %xmm0
|
||||
; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vmovd %esi, %xmm1
|
||||
; AVX512F-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX512F-NEXT: movswl %cx, %ecx
|
||||
; AVX512F-NEXT: vmovd %ecx, %xmm2
|
||||
; AVX512F-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; AVX512F-NEXT: cwtl
|
||||
; AVX512F-NEXT: vmovd %eax, %xmm3
|
||||
; AVX512F-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; AVX512F-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
|
||||
; AVX512F-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
||||
; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: cvt_4i16_to_4f64:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpmovdw %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX512VL-NEXT: movq -{{[0-9]+}}(%rsp), %rax
|
||||
; AVX512VL-NEXT: movq %rax, %rcx
|
||||
; AVX512VL-NEXT: movl %eax, %edx
|
||||
; AVX512VL-NEXT: movswl %ax, %esi
|
||||
; AVX512VL-NEXT: shrq $48, %rax
|
||||
; AVX512VL-NEXT: shrq $32, %rcx
|
||||
; AVX512VL-NEXT: shrl $16, %edx
|
||||
; AVX512VL-NEXT: movswl %dx, %edx
|
||||
; AVX512VL-NEXT: vmovd %edx, %xmm0
|
||||
; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vmovd %esi, %xmm1
|
||||
; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: movswl %cx, %ecx
|
||||
; AVX512VL-NEXT: vmovd %ecx, %xmm2
|
||||
; AVX512VL-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: cwtl
|
||||
; AVX512VL-NEXT: vmovd %eax, %xmm3
|
||||
; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; AVX512VL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
|
||||
; AVX512VL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
||||
; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
; ALL-LABEL: cvt_4i16_to_4f64:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
|
||||
; ALL-NEXT: vmovq %xmm0, %rax
|
||||
; ALL-NEXT: movq %rax, %rcx
|
||||
; ALL-NEXT: movl %eax, %edx
|
||||
; ALL-NEXT: movswl %ax, %esi
|
||||
; ALL-NEXT: shrq $48, %rax
|
||||
; ALL-NEXT: shrq $32, %rcx
|
||||
; ALL-NEXT: shrl $16, %edx
|
||||
; ALL-NEXT: movswl %dx, %edx
|
||||
; ALL-NEXT: vmovd %edx, %xmm0
|
||||
; ALL-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; ALL-NEXT: vmovd %esi, %xmm1
|
||||
; ALL-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; ALL-NEXT: movswl %cx, %ecx
|
||||
; ALL-NEXT: vmovd %ecx, %xmm2
|
||||
; ALL-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; ALL-NEXT: cwtl
|
||||
; ALL-NEXT: vmovd %eax, %xmm3
|
||||
; ALL-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; ALL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
|
||||
; ALL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
|
||||
; ALL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
||||
; ALL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
|
||||
; ALL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
|
||||
; ALL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; ALL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; ALL-NEXT: retq
|
||||
%1 = bitcast <4 x i16> %a0 to <4 x half>
|
||||
%2 = fpext <4 x half> %1 to <4 x double>
|
||||
ret <4 x double> %2
|
||||
|
@ -1454,123 +1123,34 @@ define <2 x double> @cvt_8i16_to_2f64(<8 x i16> %a0) nounwind {
|
|||
}
|
||||
|
||||
define <4 x double> @cvt_8i16_to_4f64(<8 x i16> %a0) nounwind {
|
||||
; AVX1-LABEL: cvt_8i16_to_4f64:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovq %xmm0, %rax
|
||||
; AVX1-NEXT: movq %rax, %rcx
|
||||
; AVX1-NEXT: movl %eax, %edx
|
||||
; AVX1-NEXT: movswl %ax, %esi
|
||||
; AVX1-NEXT: shrq $48, %rax
|
||||
; AVX1-NEXT: shrq $32, %rcx
|
||||
; AVX1-NEXT: shrl $16, %edx
|
||||
; AVX1-NEXT: movswl %dx, %edx
|
||||
; AVX1-NEXT: vmovd %edx, %xmm0
|
||||
; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %esi, %xmm1
|
||||
; AVX1-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX1-NEXT: movswl %cx, %ecx
|
||||
; AVX1-NEXT: vmovd %ecx, %xmm2
|
||||
; AVX1-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; AVX1-NEXT: cwtl
|
||||
; AVX1-NEXT: vmovd %eax, %xmm3
|
||||
; AVX1-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; AVX1-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
||||
; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: cvt_8i16_to_4f64:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovq %xmm0, %rax
|
||||
; AVX2-NEXT: movq %rax, %rcx
|
||||
; AVX2-NEXT: movl %eax, %edx
|
||||
; AVX2-NEXT: movswl %ax, %esi
|
||||
; AVX2-NEXT: shrq $48, %rax
|
||||
; AVX2-NEXT: shrq $32, %rcx
|
||||
; AVX2-NEXT: shrl $16, %edx
|
||||
; AVX2-NEXT: movswl %dx, %edx
|
||||
; AVX2-NEXT: vmovd %edx, %xmm0
|
||||
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %esi, %xmm1
|
||||
; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX2-NEXT: movswl %cx, %ecx
|
||||
; AVX2-NEXT: vmovd %ecx, %xmm2
|
||||
; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; AVX2-NEXT: cwtl
|
||||
; AVX2-NEXT: vmovd %eax, %xmm3
|
||||
; AVX2-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; AVX2-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
|
||||
; AVX2-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
||||
; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: cvt_8i16_to_4f64:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovq %xmm0, %rax
|
||||
; AVX512F-NEXT: movq %rax, %rcx
|
||||
; AVX512F-NEXT: movl %eax, %edx
|
||||
; AVX512F-NEXT: movswl %ax, %esi
|
||||
; AVX512F-NEXT: shrq $48, %rax
|
||||
; AVX512F-NEXT: shrq $32, %rcx
|
||||
; AVX512F-NEXT: shrl $16, %edx
|
||||
; AVX512F-NEXT: movswl %dx, %edx
|
||||
; AVX512F-NEXT: vmovd %edx, %xmm0
|
||||
; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vmovd %esi, %xmm1
|
||||
; AVX512F-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX512F-NEXT: movswl %cx, %ecx
|
||||
; AVX512F-NEXT: vmovd %ecx, %xmm2
|
||||
; AVX512F-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; AVX512F-NEXT: cwtl
|
||||
; AVX512F-NEXT: vmovd %eax, %xmm3
|
||||
; AVX512F-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; AVX512F-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
|
||||
; AVX512F-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
||||
; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: cvt_8i16_to_4f64:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX512VL-NEXT: vpmovdw %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX512VL-NEXT: movq -{{[0-9]+}}(%rsp), %rax
|
||||
; AVX512VL-NEXT: movq %rax, %rcx
|
||||
; AVX512VL-NEXT: movl %eax, %edx
|
||||
; AVX512VL-NEXT: movswl %ax, %esi
|
||||
; AVX512VL-NEXT: shrq $48, %rax
|
||||
; AVX512VL-NEXT: shrq $32, %rcx
|
||||
; AVX512VL-NEXT: shrl $16, %edx
|
||||
; AVX512VL-NEXT: movswl %dx, %edx
|
||||
; AVX512VL-NEXT: vmovd %edx, %xmm0
|
||||
; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vmovd %esi, %xmm1
|
||||
; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: movswl %cx, %ecx
|
||||
; AVX512VL-NEXT: vmovd %ecx, %xmm2
|
||||
; AVX512VL-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: cwtl
|
||||
; AVX512VL-NEXT: vmovd %eax, %xmm3
|
||||
; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; AVX512VL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
|
||||
; AVX512VL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
||||
; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
; ALL-LABEL: cvt_8i16_to_4f64:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: vmovq %xmm0, %rax
|
||||
; ALL-NEXT: movq %rax, %rcx
|
||||
; ALL-NEXT: movl %eax, %edx
|
||||
; ALL-NEXT: movswl %ax, %esi
|
||||
; ALL-NEXT: shrq $48, %rax
|
||||
; ALL-NEXT: shrq $32, %rcx
|
||||
; ALL-NEXT: shrl $16, %edx
|
||||
; ALL-NEXT: movswl %dx, %edx
|
||||
; ALL-NEXT: vmovd %edx, %xmm0
|
||||
; ALL-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; ALL-NEXT: vmovd %esi, %xmm1
|
||||
; ALL-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; ALL-NEXT: movswl %cx, %ecx
|
||||
; ALL-NEXT: vmovd %ecx, %xmm2
|
||||
; ALL-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; ALL-NEXT: cwtl
|
||||
; ALL-NEXT: vmovd %eax, %xmm3
|
||||
; ALL-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; ALL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
|
||||
; ALL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
|
||||
; ALL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
||||
; ALL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
|
||||
; ALL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
|
||||
; ALL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; ALL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; ALL-NEXT: retq
|
||||
%1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%2 = bitcast <4 x i16> %1 to <4 x half>
|
||||
%3 = fpext <4 x half> %2 to <4 x double>
|
||||
|
@ -1812,123 +1392,34 @@ define <4 x double> @load_cvt_4i16_to_4f64(<4 x i16>* %a0) nounwind {
|
|||
}
|
||||
|
||||
define <4 x double> @load_cvt_8i16_to_4f64(<8 x i16>* %a0) nounwind {
|
||||
; AVX1-LABEL: load_cvt_8i16_to_4f64:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: movq (%rdi), %rax
|
||||
; AVX1-NEXT: movq %rax, %rcx
|
||||
; AVX1-NEXT: movl %eax, %edx
|
||||
; AVX1-NEXT: movswl %ax, %esi
|
||||
; AVX1-NEXT: shrq $48, %rax
|
||||
; AVX1-NEXT: shrq $32, %rcx
|
||||
; AVX1-NEXT: shrl $16, %edx
|
||||
; AVX1-NEXT: movswl %dx, %edx
|
||||
; AVX1-NEXT: vmovd %edx, %xmm0
|
||||
; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %esi, %xmm1
|
||||
; AVX1-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX1-NEXT: movswl %cx, %ecx
|
||||
; AVX1-NEXT: vmovd %ecx, %xmm2
|
||||
; AVX1-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; AVX1-NEXT: cwtl
|
||||
; AVX1-NEXT: vmovd %eax, %xmm3
|
||||
; AVX1-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; AVX1-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
||||
; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: load_cvt_8i16_to_4f64:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: movq (%rdi), %rax
|
||||
; AVX2-NEXT: movq %rax, %rcx
|
||||
; AVX2-NEXT: movl %eax, %edx
|
||||
; AVX2-NEXT: movswl %ax, %esi
|
||||
; AVX2-NEXT: shrq $48, %rax
|
||||
; AVX2-NEXT: shrq $32, %rcx
|
||||
; AVX2-NEXT: shrl $16, %edx
|
||||
; AVX2-NEXT: movswl %dx, %edx
|
||||
; AVX2-NEXT: vmovd %edx, %xmm0
|
||||
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %esi, %xmm1
|
||||
; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX2-NEXT: movswl %cx, %ecx
|
||||
; AVX2-NEXT: vmovd %ecx, %xmm2
|
||||
; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; AVX2-NEXT: cwtl
|
||||
; AVX2-NEXT: vmovd %eax, %xmm3
|
||||
; AVX2-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; AVX2-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
|
||||
; AVX2-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
||||
; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: load_cvt_8i16_to_4f64:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: movq (%rdi), %rax
|
||||
; AVX512F-NEXT: movq %rax, %rcx
|
||||
; AVX512F-NEXT: movl %eax, %edx
|
||||
; AVX512F-NEXT: movswl %ax, %esi
|
||||
; AVX512F-NEXT: shrq $48, %rax
|
||||
; AVX512F-NEXT: shrq $32, %rcx
|
||||
; AVX512F-NEXT: shrl $16, %edx
|
||||
; AVX512F-NEXT: movswl %dx, %edx
|
||||
; AVX512F-NEXT: vmovd %edx, %xmm0
|
||||
; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vmovd %esi, %xmm1
|
||||
; AVX512F-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX512F-NEXT: movswl %cx, %ecx
|
||||
; AVX512F-NEXT: vmovd %ecx, %xmm2
|
||||
; AVX512F-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; AVX512F-NEXT: cwtl
|
||||
; AVX512F-NEXT: vmovd %eax, %xmm3
|
||||
; AVX512F-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; AVX512F-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
|
||||
; AVX512F-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
||||
; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: load_cvt_8i16_to_4f64:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
|
||||
; AVX512VL-NEXT: vpmovdw %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX512VL-NEXT: movq -{{[0-9]+}}(%rsp), %rax
|
||||
; AVX512VL-NEXT: movq %rax, %rcx
|
||||
; AVX512VL-NEXT: movl %eax, %edx
|
||||
; AVX512VL-NEXT: movswl %ax, %esi
|
||||
; AVX512VL-NEXT: shrq $48, %rax
|
||||
; AVX512VL-NEXT: shrq $32, %rcx
|
||||
; AVX512VL-NEXT: shrl $16, %edx
|
||||
; AVX512VL-NEXT: movswl %dx, %edx
|
||||
; AVX512VL-NEXT: vmovd %edx, %xmm0
|
||||
; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vmovd %esi, %xmm1
|
||||
; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: movswl %cx, %ecx
|
||||
; AVX512VL-NEXT: vmovd %ecx, %xmm2
|
||||
; AVX512VL-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: cwtl
|
||||
; AVX512VL-NEXT: vmovd %eax, %xmm3
|
||||
; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; AVX512VL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
|
||||
; AVX512VL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
||||
; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
; ALL-LABEL: load_cvt_8i16_to_4f64:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: movq (%rdi), %rax
|
||||
; ALL-NEXT: movq %rax, %rcx
|
||||
; ALL-NEXT: movl %eax, %edx
|
||||
; ALL-NEXT: movswl %ax, %esi
|
||||
; ALL-NEXT: shrq $48, %rax
|
||||
; ALL-NEXT: shrq $32, %rcx
|
||||
; ALL-NEXT: shrl $16, %edx
|
||||
; ALL-NEXT: movswl %dx, %edx
|
||||
; ALL-NEXT: vmovd %edx, %xmm0
|
||||
; ALL-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; ALL-NEXT: vmovd %esi, %xmm1
|
||||
; ALL-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; ALL-NEXT: movswl %cx, %ecx
|
||||
; ALL-NEXT: vmovd %ecx, %xmm2
|
||||
; ALL-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; ALL-NEXT: cwtl
|
||||
; ALL-NEXT: vmovd %eax, %xmm3
|
||||
; ALL-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; ALL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
|
||||
; ALL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
|
||||
; ALL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
||||
; ALL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
|
||||
; ALL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
|
||||
; ALL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; ALL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; ALL-NEXT: retq
|
||||
%1 = load <8 x i16>, <8 x i16>* %a0
|
||||
%2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%3 = bitcast <4 x i16> %2 to <4 x half>
|
||||
|
|
|
@ -1639,29 +1639,11 @@ define i64 @trunc2i64_i64(<2 x i64> %inval) {
|
|||
; AVX-NEXT: vmovq %xmm0, %rax
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: trunc2i64_i64:
|
||||
; AVX512F: # %bb.0: # %entry
|
||||
; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; AVX512F-NEXT: vmovq %xmm0, %rax
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: trunc2i64_i64:
|
||||
; AVX512VL: # %bb.0: # %entry
|
||||
; AVX512VL-NEXT: vpmovqd %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX512VL-NEXT: movq -{{[0-9]+}}(%rsp), %rax
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: trunc2i64_i64:
|
||||
; AVX512BW: # %bb.0: # %entry
|
||||
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; AVX512BW-NEXT: vmovq %xmm0, %rax
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512BWVL-LABEL: trunc2i64_i64:
|
||||
; AVX512BWVL: # %bb.0: # %entry
|
||||
; AVX512BWVL-NEXT: vpmovqd %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX512BWVL-NEXT: movq -{{[0-9]+}}(%rsp), %rax
|
||||
; AVX512BWVL-NEXT: retq
|
||||
; AVX512-LABEL: trunc2i64_i64:
|
||||
; AVX512: # %bb.0: # %entry
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; AVX512-NEXT: vmovq %xmm0, %rax
|
||||
; AVX512-NEXT: retq
|
||||
entry:
|
||||
%0 = trunc <2 x i64> %inval to <2 x i32>
|
||||
%1 = bitcast <2 x i32> %0 to i64
|
||||
|
@ -1746,29 +1728,11 @@ define i64 @trunc4i32_i64(<4 x i32> %inval) {
|
|||
; AVX-NEXT: vmovq %xmm0, %rax
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: trunc4i32_i64:
|
||||
; AVX512F: # %bb.0: # %entry
|
||||
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
|
||||
; AVX512F-NEXT: vmovq %xmm0, %rax
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: trunc4i32_i64:
|
||||
; AVX512VL: # %bb.0: # %entry
|
||||
; AVX512VL-NEXT: vpmovdw %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX512VL-NEXT: movq -{{[0-9]+}}(%rsp), %rax
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: trunc4i32_i64:
|
||||
; AVX512BW: # %bb.0: # %entry
|
||||
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
|
||||
; AVX512BW-NEXT: vmovq %xmm0, %rax
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512BWVL-LABEL: trunc4i32_i64:
|
||||
; AVX512BWVL: # %bb.0: # %entry
|
||||
; AVX512BWVL-NEXT: vpmovdw %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX512BWVL-NEXT: movq -{{[0-9]+}}(%rsp), %rax
|
||||
; AVX512BWVL-NEXT: retq
|
||||
; AVX512-LABEL: trunc4i32_i64:
|
||||
; AVX512: # %bb.0: # %entry
|
||||
; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
|
||||
; AVX512-NEXT: vmovq %xmm0, %rax
|
||||
; AVX512-NEXT: retq
|
||||
entry:
|
||||
%0 = trunc <4 x i32> %inval to <4 x i16>
|
||||
%1 = bitcast <4 x i16> %0 to i64
|
||||
|
@ -1849,29 +1813,11 @@ define i64 @trunc8i16_i64(<8 x i16> %inval) {
|
|||
; AVX-NEXT: vmovq %xmm0, %rax
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: trunc8i16_i64:
|
||||
; AVX512F: # %bb.0: # %entry
|
||||
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
|
||||
; AVX512F-NEXT: vmovq %xmm0, %rax
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: trunc8i16_i64:
|
||||
; AVX512VL: # %bb.0: # %entry
|
||||
; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
|
||||
; AVX512VL-NEXT: vmovq %xmm0, %rax
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: trunc8i16_i64:
|
||||
; AVX512BW: # %bb.0: # %entry
|
||||
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
|
||||
; AVX512BW-NEXT: vmovq %xmm0, %rax
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512BWVL-LABEL: trunc8i16_i64:
|
||||
; AVX512BWVL: # %bb.0: # %entry
|
||||
; AVX512BWVL-NEXT: vpmovwb %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX512BWVL-NEXT: movq -{{[0-9]+}}(%rsp), %rax
|
||||
; AVX512BWVL-NEXT: retq
|
||||
; AVX512-LABEL: trunc8i16_i64:
|
||||
; AVX512: # %bb.0: # %entry
|
||||
; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
|
||||
; AVX512-NEXT: vmovq %xmm0, %rax
|
||||
; AVX512-NEXT: retq
|
||||
entry:
|
||||
%0 = trunc <8 x i16> %inval to <8 x i8>
|
||||
%1 = bitcast <8 x i8> %0 to i64
|
||||
|
|
Loading…
Reference in New Issue