forked from OSchip/llvm-project
[X86][SSE] Enable post-legalize vXi64 shuffle combining on 32-bit targets
Long ago (2010 according to svn blame), combineShuffle probably needed to prevent the accidental creation of illegal i64 types but there doesn't appear to be any combines that can cause this any more as they all have their own legality checks. Differential Revision: https://reviews.llvm.org/D30213 llvm-svn: 296966
This commit is contained in:
parent
065c1abc5e
commit
40a0e66b37
|
@ -28380,12 +28380,7 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
|
|||
const X86Subtarget &Subtarget) {
|
||||
SDLoc dl(N);
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
// Don't create instructions with illegal types after legalize types has run.
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(VT.getVectorElementType()))
|
||||
return SDValue();
|
||||
|
||||
// If we have legalized the vector types, look for blends of FADD and FSUB
|
||||
// nodes that we can fuse into an ADDSUB node.
|
||||
if (TLI.isTypeLegal(VT))
|
||||
|
|
|
@ -16,8 +16,7 @@ define <8 x i16> @test2(<4 x i16>* %v) nounwind {
|
|||
; CHECK-LABEL: test2:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; CHECK-NEXT: retl
|
||||
%v9 = load <4 x i16>, <4 x i16> * %v, align 8
|
||||
%v10 = shufflevector <4 x i16> %v9, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
|
|
|
@ -12,7 +12,7 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) noun
|
|||
; X32-SSE-NEXT: andl $-128, %esp
|
||||
; X32-SSE-NEXT: subl $384, %esp # imm = 0x180
|
||||
; X32-SSE-NEXT: movl 88(%ebp), %ecx
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[2,3,0,1]
|
||||
; X32-SSE-NEXT: movdqa 72(%ebp), %xmm0
|
||||
; X32-SSE-NEXT: xorps %xmm1, %xmm1
|
||||
; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
|
||||
; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
|
||||
|
@ -21,7 +21,7 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) noun
|
|||
; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
|
||||
; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
|
||||
; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
|
||||
; X32-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; X32-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; X32-SSE-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp)
|
||||
; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
|
||||
; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
|
||||
|
|
|
@ -499,7 +499,7 @@ define <4 x i64> @combine_pshufb_as_zext128(<32 x i8> %a0) {
|
|||
; X32-LABEL: combine_pshufb_as_zext128:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
|
||||
; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
|
||||
; X32-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[0,1,0,1]
|
||||
; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[15,14],zero,zero,zero,zero,zero,zero,ymm0[13,12],zero,zero,zero,zero,zero,zero,ymm0[31,30],zero,zero,zero,zero,zero,zero,ymm0[29,28],zero,zero,zero,zero,zero,zero
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
|
|
|
@ -66,10 +66,6 @@ define <8 x double> @combine_permvar_8f64_identity_mask(<8 x double> %x0, <8 x d
|
|||
define <8 x i64> @combine_permvar_8i64_identity(<8 x i64> %x0, <8 x i64> %x1) {
|
||||
; X32-LABEL: combine_permvar_8i64_identity:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [7,0,6,0,5,0,4,0,3,0,2,0,1,0,0,0]
|
||||
; X32-NEXT: vpermq %zmm0, %zmm1, %zmm0
|
||||
; X32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [7,0,14,0,5,0,12,0,3,0,10,0,1,0,8,0]
|
||||
; X32-NEXT: vpermq %zmm0, %zmm1, %zmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: combine_permvar_8i64_identity:
|
||||
|
@ -189,10 +185,6 @@ define <8 x double> @combine_vpermt2var_8f64_movddup_mask(<8 x double> %x0, <8 x
|
|||
define <8 x i64> @combine_vpermt2var_8i64_identity(<8 x i64> %x0, <8 x i64> %x1) {
|
||||
; X32-LABEL: combine_vpermt2var_8i64_identity:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vmovdqa64 {{.*#+}} zmm2 = <u,u,6,0,5,0,4,0,3,0,2,0,1,0,0,0>
|
||||
; X32-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
|
||||
; X32-NEXT: vmovdqa64 {{.*#+}} zmm0 = <u,u,14,0,5,0,12,0,3,0,10,0,1,0,8,0>
|
||||
; X32-NEXT: vpermi2q %zmm2, %zmm2, %zmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: combine_vpermt2var_8i64_identity:
|
||||
|
@ -648,8 +640,7 @@ define <16 x i32> @combine_permvar_as_vpbroadcastd512(<16 x i32> %x0) {
|
|||
define <8 x i64> @combine_permvar_as_vpbroadcastq512(<8 x i64> %x0) {
|
||||
; X32-LABEL: combine_permvar_as_vpbroadcastq512:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpxord %zmm1, %zmm1, %zmm1
|
||||
; X32-NEXT: vpermq %zmm0, %zmm1, %zmm0
|
||||
; X32-NEXT: vbroadcastsd %xmm0, %zmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: combine_permvar_as_vpbroadcastq512:
|
||||
|
@ -663,8 +654,7 @@ define <8 x i64> @combine_permvar_as_vpbroadcastq512(<8 x i64> %x0) {
|
|||
define <8 x i64> @combine_permvar_8i64_as_permq(<8 x i64> %x0, <8 x i64> %x1) {
|
||||
; X32-LABEL: combine_permvar_8i64_as_permq:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vmovdqa64 {{.*#+}} zmm1 = <3,0,2,0,1,0,u,u,u,u,6,0,5,0,4,0>
|
||||
; X32-NEXT: vpermq %zmm0, %zmm1, %zmm0
|
||||
; X32-NEXT: vpermq {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4]
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: combine_permvar_8i64_as_permq:
|
||||
|
@ -679,8 +669,7 @@ define <8 x i64> @combine_permvar_8i64_as_permq_mask(<8 x i64> %x0, <8 x i64> %x
|
|||
; X32: # BB#0:
|
||||
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: kmovd %eax, %k1
|
||||
; X32-NEXT: vmovdqa64 {{.*#+}} zmm2 = <3,0,2,0,1,0,u,u,u,u,6,0,5,0,4,0>
|
||||
; X32-NEXT: vpermq %zmm0, %zmm2, %zmm1 {%k1}
|
||||
; X32-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,2,1,0,7,6,5,4]
|
||||
; X32-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
|
@ -872,10 +861,6 @@ define <8 x double> @combine_vpermi2var_8f64_as_shufpd(<8 x double> %x0, <8 x do
|
|||
define <8 x i64> @combine_vpermi2var_8i64_identity(<8 x i64> %x0, <8 x i64> %x1) {
|
||||
; X32-LABEL: combine_vpermi2var_8i64_identity:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vmovdqa64 {{.*#+}} zmm2 = <u,u,6,0,5,0,4,0,3,0,2,0,1,0,0,0>
|
||||
; X32-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
|
||||
; X32-NEXT: vmovdqa64 {{.*#+}} zmm0 = <u,u,14,0,5,0,12,0,3,0,10,0,1,0,8,0>
|
||||
; X32-NEXT: vpermi2q %zmm2, %zmm2, %zmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: combine_vpermi2var_8i64_identity:
|
||||
|
@ -973,10 +958,8 @@ define <8 x double> @combine_vpermi2var_8f64_as_vpermpd(<8 x double> %x0, <8 x d
|
|||
define <8 x i64> @combine_vpermt2var_8i64_as_vpermq(<8 x i64> %x0, <8 x i64> %x1) {
|
||||
; X32-LABEL: combine_vpermt2var_8i64_as_vpermq:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,0,2,0,1,0,0,0,7,0,6,0,5,0,4,0]
|
||||
; X32-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
|
||||
; X32-NEXT: vmovdqa64 {{.*#+}} zmm0 = [12,0,5,0,14,0,7,0,8,0,1,0,10,0,3,0]
|
||||
; X32-NEXT: vpermi2q %zmm2, %zmm2, %zmm0
|
||||
; X32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [7,0,6,0,5,0,4,0,3,0,2,0,1,0,0,0]
|
||||
; X32-NEXT: vpermq %zmm0, %zmm1, %zmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: combine_vpermt2var_8i64_as_vpermq:
|
||||
|
@ -1136,9 +1119,7 @@ define <16 x float> @combine_vpermi2var_vpermvar_16f32_as_vperm2_zero(<16 x floa
|
|||
define <8 x i64> @combine_vpermvar_insertion_as_broadcast_v8i64(i64 %a0) {
|
||||
; X32-LABEL: combine_vpermvar_insertion_as_broadcast_v8i64:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; X32-NEXT: vpxord %zmm1, %zmm1, %zmm1
|
||||
; X32-NEXT: vpermq %zmm0, %zmm1, %zmm0
|
||||
; X32-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %zmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: combine_vpermvar_insertion_as_broadcast_v8i64:
|
||||
|
|
Loading…
Reference in New Issue