[X86][SSE] Enable post-legalize vXi64 shuffle combining on 32-bit targets

Long ago (2010 according to svn blame), combineShuffle probably needed to prevent the accidental creation of illegal i64 types but there doesn't appear to be any combines that can cause this any more as they all have their own legality checks.

Differential Revision: https://reviews.llvm.org/D30213

llvm-svn: 296966
This commit is contained in:
Simon Pilgrim 2017-03-04 12:50:47 +00:00
parent 065c1abc5e
commit 40a0e66b37
5 changed files with 10 additions and 35 deletions

View File

@ -28380,12 +28380,7 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDLoc dl(N);
EVT VT = N->getValueType(0);
// Don't create instructions with illegal types after legalize types has run.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(VT.getVectorElementType()))
return SDValue();
// If we have legalized the vector types, look for blends of FADD and FSUB
// nodes that we can fuse into an ADDSUB node.
if (TLI.isTypeLegal(VT))

View File

@ -16,8 +16,7 @@ define <8 x i16> @test2(<4 x i16>* %v) nounwind {
; CHECK-LABEL: test2:
; CHECK: # BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: retl
%v9 = load <4 x i16>, <4 x i16> * %v, align 8
%v10 = shufflevector <4 x i16> %v9, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>

View File

@ -12,7 +12,7 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) noun
; X32-SSE-NEXT: andl $-128, %esp
; X32-SSE-NEXT: subl $384, %esp # imm = 0x180
; X32-SSE-NEXT: movl 88(%ebp), %ecx
; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[2,3,0,1]
; X32-SSE-NEXT: movdqa 72(%ebp), %xmm0
; X32-SSE-NEXT: xorps %xmm1, %xmm1
; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
@ -21,7 +21,7 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) noun
; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
; X32-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; X32-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
; X32-SSE-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp)
; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)

View File

@ -499,7 +499,7 @@ define <4 x i64> @combine_pshufb_as_zext128(<32 x i8> %a0) {
; X32-LABEL: combine_pshufb_as_zext128:
; X32: # BB#0:
; X32-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
; X32-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[0,1,0,1]
; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[15,14],zero,zero,zero,zero,zero,zero,ymm0[13,12],zero,zero,zero,zero,zero,zero,ymm0[31,30],zero,zero,zero,zero,zero,zero,ymm0[29,28],zero,zero,zero,zero,zero,zero
; X32-NEXT: retl
;

View File

@ -66,10 +66,6 @@ define <8 x double> @combine_permvar_8f64_identity_mask(<8 x double> %x0, <8 x d
define <8 x i64> @combine_permvar_8i64_identity(<8 x i64> %x0, <8 x i64> %x1) {
; X32-LABEL: combine_permvar_8i64_identity:
; X32: # BB#0:
; X32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [7,0,6,0,5,0,4,0,3,0,2,0,1,0,0,0]
; X32-NEXT: vpermq %zmm0, %zmm1, %zmm0
; X32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [7,0,14,0,5,0,12,0,3,0,10,0,1,0,8,0]
; X32-NEXT: vpermq %zmm0, %zmm1, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: combine_permvar_8i64_identity:
@ -189,10 +185,6 @@ define <8 x double> @combine_vpermt2var_8f64_movddup_mask(<8 x double> %x0, <8 x
define <8 x i64> @combine_vpermt2var_8i64_identity(<8 x i64> %x0, <8 x i64> %x1) {
; X32-LABEL: combine_vpermt2var_8i64_identity:
; X32: # BB#0:
; X32-NEXT: vmovdqa64 {{.*#+}} zmm2 = <u,u,6,0,5,0,4,0,3,0,2,0,1,0,0,0>
; X32-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; X32-NEXT: vmovdqa64 {{.*#+}} zmm0 = <u,u,14,0,5,0,12,0,3,0,10,0,1,0,8,0>
; X32-NEXT: vpermi2q %zmm2, %zmm2, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermt2var_8i64_identity:
@ -648,8 +640,7 @@ define <16 x i32> @combine_permvar_as_vpbroadcastd512(<16 x i32> %x0) {
define <8 x i64> @combine_permvar_as_vpbroadcastq512(<8 x i64> %x0) {
; X32-LABEL: combine_permvar_as_vpbroadcastq512:
; X32: # BB#0:
; X32-NEXT: vpxord %zmm1, %zmm1, %zmm1
; X32-NEXT: vpermq %zmm0, %zmm1, %zmm0
; X32-NEXT: vbroadcastsd %xmm0, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: combine_permvar_as_vpbroadcastq512:
@ -663,8 +654,7 @@ define <8 x i64> @combine_permvar_as_vpbroadcastq512(<8 x i64> %x0) {
define <8 x i64> @combine_permvar_8i64_as_permq(<8 x i64> %x0, <8 x i64> %x1) {
; X32-LABEL: combine_permvar_8i64_as_permq:
; X32: # BB#0:
; X32-NEXT: vmovdqa64 {{.*#+}} zmm1 = <3,0,2,0,1,0,u,u,u,u,6,0,5,0,4,0>
; X32-NEXT: vpermq %zmm0, %zmm1, %zmm0
; X32-NEXT: vpermq {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4]
; X32-NEXT: retl
;
; X64-LABEL: combine_permvar_8i64_as_permq:
@ -679,8 +669,7 @@ define <8 x i64> @combine_permvar_8i64_as_permq_mask(<8 x i64> %x0, <8 x i64> %x
; X32: # BB#0:
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: kmovd %eax, %k1
; X32-NEXT: vmovdqa64 {{.*#+}} zmm2 = <3,0,2,0,1,0,u,u,u,u,6,0,5,0,4,0>
; X32-NEXT: vpermq %zmm0, %zmm2, %zmm1 {%k1}
; X32-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,2,1,0,7,6,5,4]
; X32-NEXT: vmovdqa64 %zmm1, %zmm0
; X32-NEXT: retl
;
@ -872,10 +861,6 @@ define <8 x double> @combine_vpermi2var_8f64_as_shufpd(<8 x double> %x0, <8 x do
define <8 x i64> @combine_vpermi2var_8i64_identity(<8 x i64> %x0, <8 x i64> %x1) {
; X32-LABEL: combine_vpermi2var_8i64_identity:
; X32: # BB#0:
; X32-NEXT: vmovdqa64 {{.*#+}} zmm2 = <u,u,6,0,5,0,4,0,3,0,2,0,1,0,0,0>
; X32-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; X32-NEXT: vmovdqa64 {{.*#+}} zmm0 = <u,u,14,0,5,0,12,0,3,0,10,0,1,0,8,0>
; X32-NEXT: vpermi2q %zmm2, %zmm2, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermi2var_8i64_identity:
@ -973,10 +958,8 @@ define <8 x double> @combine_vpermi2var_8f64_as_vpermpd(<8 x double> %x0, <8 x d
define <8 x i64> @combine_vpermt2var_8i64_as_vpermq(<8 x i64> %x0, <8 x i64> %x1) {
; X32-LABEL: combine_vpermt2var_8i64_as_vpermq:
; X32: # BB#0:
; X32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,0,2,0,1,0,0,0,7,0,6,0,5,0,4,0]
; X32-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; X32-NEXT: vmovdqa64 {{.*#+}} zmm0 = [12,0,5,0,14,0,7,0,8,0,1,0,10,0,3,0]
; X32-NEXT: vpermi2q %zmm2, %zmm2, %zmm0
; X32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [7,0,6,0,5,0,4,0,3,0,2,0,1,0,0,0]
; X32-NEXT: vpermq %zmm0, %zmm1, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermt2var_8i64_as_vpermq:
@ -1136,9 +1119,7 @@ define <16 x float> @combine_vpermi2var_vpermvar_16f32_as_vperm2_zero(<16 x floa
define <8 x i64> @combine_vpermvar_insertion_as_broadcast_v8i64(i64 %a0) {
; X32-LABEL: combine_vpermvar_insertion_as_broadcast_v8i64:
; X32: # BB#0:
; X32-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; X32-NEXT: vpxord %zmm1, %zmm1, %zmm1
; X32-NEXT: vpermq %zmm0, %zmm1, %zmm0
; X32-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %zmm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermvar_insertion_as_broadcast_v8i64: