forked from OSchip/llvm-project
[X86 CodeGen] Optimization of ZeroExtendLoad for v2i8 vector
Load with zero-extend and sign-extend from v2i8 to v2i32 is "Legal" since SSE4.1 and may be performed using PMOVZXBD , PMOVSXBD instructions. llvm-svn: 313121
This commit is contained in:
parent
6570c1d8b8
commit
6cab129464
|
@ -941,6 +941,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
|
||||
setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
|
||||
setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
|
||||
setLoadExtAction(LoadExtOp, MVT::v2i32, MVT::v2i8, Legal);
|
||||
setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
|
||||
setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
|
||||
setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
|
||||
|
|
|
@ -3089,45 +3089,12 @@ define <2 x double> @uitofp_load_2i8_to_2f64(<2 x i8> *%a) {
|
|||
; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; VEX-LABEL: uitofp_load_2i8_to_2f64:
|
||||
; VEX: # BB#0:
|
||||
; VEX-NEXT: movzwl (%rdi), %eax
|
||||
; VEX-NEXT: vmovd %eax, %xmm0
|
||||
; VEX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; VEX-NEXT: vcvtdq2pd %xmm0, %xmm0
|
||||
; VEX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: uitofp_load_2i8_to_2f64:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: movzwl (%rdi), %eax
|
||||
; AVX512F-NEXT: vmovd %eax, %xmm0
|
||||
; AVX512F-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; AVX512F-NEXT: vcvtdq2pd %xmm0, %xmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: uitofp_load_2i8_to_2f64:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; AVX512VL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; AVX512VL-NEXT: vcvtdq2pd %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: uitofp_load_2i8_to_2f64:
|
||||
; AVX512DQ: # BB#0:
|
||||
; AVX512DQ-NEXT: movzwl (%rdi), %eax
|
||||
; AVX512DQ-NEXT: vmovd %eax, %xmm0
|
||||
; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; AVX512DQ-NEXT: vcvtdq2pd %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: uitofp_load_2i8_to_2f64:
|
||||
; AVX512VLDQ: # BB#0:
|
||||
; AVX512VLDQ-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLDQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; AVX512VLDQ-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; AVX512VLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
; AVX-LABEL: uitofp_load_2i8_to_2f64:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%ld = load <2 x i8>, <2 x i8> *%a
|
||||
%cvt = uitofp <2 x i8> %ld to <2 x double>
|
||||
ret <2 x double> %cvt
|
||||
|
|
|
@ -5079,3 +5079,51 @@ define <32 x i8> @sext_32xi1_to_32xi8(<32 x i16> %c1, <32 x i16> %c2)nounwind {
|
|||
%b = sext <32 x i1> %a to <32 x i8>
|
||||
ret <32 x i8> %b
|
||||
}
|
||||
|
||||
define <2 x i32> @sext_2i8_to_2i32(<2 x i8>* %addr) {
|
||||
; SSE2-LABEL: sext_2i8_to_2i32:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movzwl (%rdi), %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm0
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
|
||||
; SSE2-NEXT: psrad $24, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
|
||||
; SSE2-NEXT: paddq %xmm0, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: sext_2i8_to_2i32:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: movzwl (%rdi), %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm0
|
||||
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
|
||||
; SSSE3-NEXT: psrad $24, %xmm0
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
|
||||
; SSSE3-NEXT: paddq %xmm0, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: sext_2i8_to_2i32:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: pmovsxbq (%rdi), %xmm0
|
||||
; SSE41-NEXT: paddq %xmm0, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: sext_2i8_to_2i32:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vpmovsxbq (%rdi), %xmm0
|
||||
; AVX-NEXT: vpaddq %xmm0, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; X32-SSE41-LABEL: sext_2i8_to_2i32:
|
||||
; X32-SSE41: # BB#0:
|
||||
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-SSE41-NEXT: pmovsxbq (%eax), %xmm0
|
||||
; X32-SSE41-NEXT: paddq %xmm0, %xmm0
|
||||
; X32-SSE41-NEXT: retl
|
||||
%x = load <2 x i8>, <2 x i8>* %addr, align 1
|
||||
%y = sext <2 x i8> %x to <2 x i32>
|
||||
%z = add <2 x i32>%y, %y
|
||||
ret <2 x i32>%z
|
||||
}
|
||||
|
||||
|
|
|
@ -2262,19 +2262,13 @@ define <2 x i32> @zext_2i8_to_2i32(<2 x i8>* %addr) {
|
|||
;
|
||||
; SSE41-LABEL: zext_2i8_to_2i32:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: movzwl (%rdi), %eax
|
||||
; SSE41-NEXT: movd %eax, %xmm0
|
||||
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
||||
; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; SSE41-NEXT: paddq %xmm0, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: zext_2i8_to_2i32:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: movzwl (%rdi), %eax
|
||||
; AVX-NEXT: vmovd %eax, %xmm0
|
||||
; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
||||
; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX-NEXT: vpaddq %xmm0, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%x = load <2 x i8>, <2 x i8>* %addr, align 1
|
||||
|
|
Loading…
Reference in New Issue