forked from OSchip/llvm-project
[X86][SSE] Disable ZERO_EXTEND shuffle combining
rL352997 enabled ZERO_EXTEND from non-shuffle-able value types. I've disabled it for now to fix a regression identified by @asbirlea until I can fix this properly. llvm-svn: 353198
This commit is contained in:
parent
3d2df5ab83
commit
b0afc69435
|
@ -6793,8 +6793,8 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
|
|||
Mask.append(NumElts, 0);
|
||||
return true;
|
||||
}
|
||||
case ISD::ZERO_EXTEND_VECTOR_INREG:
|
||||
case ISD::ZERO_EXTEND: {
|
||||
case ISD::ZERO_EXTEND_VECTOR_INREG: {
|
||||
// TODO: Handle ISD::ZERO_EXTEND
|
||||
SDValue Src = N.getOperand(0);
|
||||
MVT SrcVT = Src.getSimpleValueType();
|
||||
unsigned NumSrcBitsPerElt = SrcVT.getScalarSizeInBits();
|
||||
|
|
|
@ -1526,8 +1526,9 @@ define <8 x i32> @shuffle_v8i32_08192a3b(<8 x i32> %a, <8 x i32> %b) {
|
|||
;
|
||||
; AVX512VL-LABEL: shuffle_v8i32_08192a3b:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,8,1,9,2,10,3,11]
|
||||
; AVX512VL-NEXT: vpermt2d %ymm1, %ymm2, %ymm0
|
||||
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm0 = [0,8,2,9,4,10,6,11]
|
||||
; AVX512VL-NEXT: vpermi2d %ymm1, %ymm2, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
|
||||
ret <8 x i32> %shuffle
|
||||
|
@ -1571,23 +1572,11 @@ define <8 x i32> @shuffle_v8i32_091b2d3f(<8 x i32> %a, <8 x i32> %b) {
|
|||
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v8i32_091b2d3f:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-SLOW-LABEL: shuffle_v8i32_091b2d3f:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX512VL-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: shuffle_v8i32_091b2d3f:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,9,1,11,2,13,3,15]
|
||||
; AVX512VL-FAST-NEXT: vpermt2d %ymm1, %ymm2, %ymm0
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
; AVX2OR512VL-LABEL: shuffle_v8i32_091b2d3f:
|
||||
; AVX2OR512VL: # %bb.0:
|
||||
; AVX2OR512VL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX2OR512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
|
||||
; AVX2OR512VL-NEXT: retq
|
||||
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
|
||||
ret <8 x i32> %shuffle
|
||||
}
|
||||
|
|
|
@ -760,3 +760,29 @@ entry:
|
|||
%shuf2 = shufflevector <8 x float> %inp1, <8 x float> %shuf1, <8 x i32> <i32 15, i32 10, i32 7, i32 2, i32 12, i32 undef, i32 3, i32 2>
|
||||
ret <8 x float> %shuf2
|
||||
}
|
||||
|
||||
define void @packss_zext_v8i1() {
|
||||
; X86-LABEL: packss_zext_v8i1:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; X86-NEXT: vmovups %ymm0, (%eax)
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: packss_zext_v8i1:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; X64-NEXT: vmovups %ymm0, (%rax)
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
%tmp0 = icmp sgt <8 x i32> undef, undef
|
||||
%tmp1 = zext <8 x i1> %tmp0 to <8 x i32>
|
||||
%tmp2 = shufflevector <8 x i32> %tmp1, <8 x i32> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%tmp3 = trunc <16 x i32> %tmp2 to <16 x i16>
|
||||
%tmp4 = add <16 x i16> zeroinitializer, %tmp3
|
||||
%tmp6 = sext <16 x i16> %tmp4 to <16 x i32>
|
||||
%tmp10 = shufflevector <16 x i32> %tmp6, <16 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
|
||||
%tmp11 = tail call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> undef, <8 x i32> %tmp10)
|
||||
store <16 x i16> %tmp11, <16 x i16>* undef, align 2
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue