[X86][SSE] Disable ZERO_EXTEND shuffle combining

rL352997 enabled ZERO_EXTEND from non-shuffle-able value types. I've disabled it for now to fix a regression identified by @asbirlea until I can fix this properly.

llvm-svn: 353198
This commit is contained in:
Simon Pilgrim 2019-02-05 19:15:48 +00:00
parent 3d2df5ab83
commit b0afc69435
3 changed files with 36 additions and 21 deletions

View File

@ -6793,8 +6793,8 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
Mask.append(NumElts, 0);
return true;
}
case ISD::ZERO_EXTEND_VECTOR_INREG:
case ISD::ZERO_EXTEND: {
case ISD::ZERO_EXTEND_VECTOR_INREG: {
// TODO: Handle ISD::ZERO_EXTEND
SDValue Src = N.getOperand(0);
MVT SrcVT = Src.getSimpleValueType();
unsigned NumSrcBitsPerElt = SrcVT.getScalarSizeInBits();

View File

@ -1526,8 +1526,9 @@ define <8 x i32> @shuffle_v8i32_08192a3b(<8 x i32> %a, <8 x i32> %b) {
;
; AVX512VL-LABEL: shuffle_v8i32_08192a3b:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,8,1,9,2,10,3,11]
; AVX512VL-NEXT: vpermt2d %ymm1, %ymm2, %ymm0
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm0 = [0,8,2,9,4,10,6,11]
; AVX512VL-NEXT: vpermi2d %ymm1, %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
ret <8 x i32> %shuffle
@ -1571,23 +1572,11 @@ define <8 x i32> @shuffle_v8i32_091b2d3f(<8 x i32> %a, <8 x i32> %b) {
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v8i32_091b2d3f:
; AVX2: # %bb.0:
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; AVX2-NEXT: retq
;
; AVX512VL-SLOW-LABEL: shuffle_v8i32_091b2d3f:
; AVX512VL-SLOW: # %bb.0:
; AVX512VL-SLOW-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX512VL-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; AVX512VL-SLOW-NEXT: retq
;
; AVX512VL-FAST-LABEL: shuffle_v8i32_091b2d3f:
; AVX512VL-FAST: # %bb.0:
; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,9,1,11,2,13,3,15]
; AVX512VL-FAST-NEXT: vpermt2d %ymm1, %ymm2, %ymm0
; AVX512VL-FAST-NEXT: retq
; AVX2OR512VL-LABEL: shuffle_v8i32_091b2d3f:
; AVX2OR512VL: # %bb.0:
; AVX2OR512VL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX2OR512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
ret <8 x i32> %shuffle
}

View File

@ -760,3 +760,29 @@ entry:
%shuf2 = shufflevector <8 x float> %inp1, <8 x float> %shuf1, <8 x i32> <i32 15, i32 10, i32 7, i32 2, i32 12, i32 undef, i32 3, i32 2>
ret <8 x float> %shuf2
}
define void @packss_zext_v8i1() {
; X86-LABEL: packss_zext_v8i1:
; X86: # %bb.0:
; X86-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X86-NEXT: vmovups %ymm0, (%eax)
; X86-NEXT: vzeroupper
; X86-NEXT: retl
;
; X64-LABEL: packss_zext_v8i1:
; X64: # %bb.0:
; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-NEXT: vmovups %ymm0, (%rax)
; X64-NEXT: vzeroupper
; X64-NEXT: retq
%tmp0 = icmp sgt <8 x i32> undef, undef
%tmp1 = zext <8 x i1> %tmp0 to <8 x i32>
%tmp2 = shufflevector <8 x i32> %tmp1, <8 x i32> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%tmp3 = trunc <16 x i32> %tmp2 to <16 x i16>
%tmp4 = add <16 x i16> zeroinitializer, %tmp3
%tmp6 = sext <16 x i16> %tmp4 to <16 x i32>
%tmp10 = shufflevector <16 x i32> %tmp6, <16 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
%tmp11 = tail call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> undef, <8 x i32> %tmp10)
store <16 x i16> %tmp11, <16 x i16>* undef, align 2
ret void
}