diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e68a6ed2a84a..6abf46f470fe 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5931,7 +5931,8 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl &Mask, Mask.push_back(i == InIdx ? NumElts + ExIdx : i); return true; } - case X86ISD::PACKSS: { + case X86ISD::PACKSS: + case X86ISD::PACKUS: { SDValue N0 = N.getOperand(0); SDValue N1 = N.getOperand(1); assert(N0.getValueType().getVectorNumElements() == (NumElts / 2) && @@ -5940,9 +5941,19 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl &Mask, // If we know input saturation won't happen we can treat this // as a truncation shuffle. - if (DAG.ComputeNumSignBits(N0) <= NumBitsPerElt || - DAG.ComputeNumSignBits(N1) <= NumBitsPerElt) - return false; + if (Opcode == X86ISD::PACKSS) { + if (DAG.ComputeNumSignBits(N0) <= NumBitsPerElt || + DAG.ComputeNumSignBits(N1) <= NumBitsPerElt) + return false; + } else { + KnownBits Known0, Known1; + DAG.computeKnownBits(N0, Known0); + if (Known0.countMinLeadingZeros() < NumBitsPerElt) + return false; + DAG.computeKnownBits(N1, Known1); + if (Known1.countMinLeadingZeros() < NumBitsPerElt) + return false; + } bool IsUnary = (N0 == N1); unsigned Offset = IsUnary ? 0 : NumElts; diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll index 69a6eb7effab..f0c7ae38b6b1 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll @@ -846,16 +846,12 @@ declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readn define <16 x i16> @shuffle_combine_packusdw_pshufb(<8 x i32> %a0, <8 x i32> %a1) { ; X32-LABEL: shuffle_combine_packusdw_pshufb: ; X32: # BB#0: -; X32-NEXT: vpsrld $16, %ymm0, %ymm0 -; X32-NEXT: vpackusdw %ymm0, %ymm0, %ymm0 -; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,4,5,2,3,0,1,6,7,4,5,2,3,0,1,16,17,18,19,20,21,22,23,22,23,20,21,18,19,16,17] +; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,10,11,6,7,2,3,14,15,10,11,6,7,2,3,18,19,22,23,26,27,30,31,30,31,26,27,22,23,18,19] ; X32-NEXT: retl ; ; X64-LABEL: shuffle_combine_packusdw_pshufb: ; X64: # BB#0: -; X64-NEXT: vpsrld $16, %ymm0, %ymm0 -; X64-NEXT: vpackusdw %ymm0, %ymm0, %ymm0 -; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,4,5,2,3,0,1,6,7,4,5,2,3,0,1,16,17,18,19,20,21,22,23,22,23,20,21,18,19,16,17] +; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,10,11,6,7,2,3,14,15,10,11,6,7,2,3,18,19,22,23,26,27,30,31,30,31,26,27,22,23,18,19] ; X64-NEXT: retq %1 = lshr <8 x i32> %a0, %2 = tail call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %1, <8 x i32> %1) @@ -867,18 +863,12 @@ declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readno define <32 x i8> @shuffle_combine_packuswb_pshufb(<16 x i16> %a0, <16 x i16> %a1) { ; X32-LABEL: shuffle_combine_packuswb_pshufb: ; X32: # BB#0: -; X32-NEXT: vpsrlw $8, %ymm0, %ymm0 -; X32-NEXT: vpsrlw $8, %ymm1, %ymm1 -; X32-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 -; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,23,22,21,20,19,18,17,16,23,22,21,20,19,18,17,16] +; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[15,13,11,9,7,5,3,1,15,13,11,9,7,5,3,1,31,29,27,25,23,21,19,17,31,29,27,25,23,21,19,17] ; X32-NEXT: retl ; ; X64-LABEL: shuffle_combine_packuswb_pshufb: ; X64: # BB#0: -; X64-NEXT: vpsrlw $8, %ymm0, %ymm0 -; X64-NEXT: vpsrlw $8, %ymm1, %ymm1 -; X64-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 -; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,23,22,21,20,19,18,17,16,23,22,21,20,19,18,17,16] +; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[15,13,11,9,7,5,3,1,15,13,11,9,7,5,3,1,31,29,27,25,23,21,19,17,31,29,27,25,23,21,19,17] ; X64-NEXT: retq %1 = lshr <16 x i16> %a0, %2 = lshr <16 x i16> %a1, diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll index 874d090794c7..3abf0570abf5 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll @@ -683,18 +683,12 @@ declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind rea define <16 x i8> @shuffle_combine_packuswb_pshufb(<8 x i16> %a0, <8 x i16> %a1) { ; SSE-LABEL: shuffle_combine_packuswb_pshufb: ; SSE: # BB#0: -; SSE-NEXT: psrlw $8, %xmm0 -; SSE-NEXT: psrlw $8, %xmm1 -; SSE-NEXT: packuswb %xmm1, %xmm0 -; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0] +; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[15,13,11,9,7,5,3,1,15,13,11,9,7,5,3,1] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_combine_packuswb_pshufb: ; AVX: # BB#0: -; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX-NEXT: vpsrlw $8, %xmm1, %xmm1 -; AVX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0] +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,13,11,9,7,5,3,1,15,13,11,9,7,5,3,1] ; AVX-NEXT: retq %1 = lshr <8 x i16> %a0, %2 = lshr <8 x i16> %a1,