forked from OSchip/llvm-project
[X86][SSE] Add faux shuffle combining support for PACKUS
llvm-svn: 314631
This commit is contained in:
parent
4f255ad6a0
commit
df23a2700d
|
@ -5931,7 +5931,8 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
|
|||
Mask.push_back(i == InIdx ? NumElts + ExIdx : i);
|
||||
return true;
|
||||
}
|
||||
case X86ISD::PACKSS: {
|
||||
case X86ISD::PACKSS:
|
||||
case X86ISD::PACKUS: {
|
||||
SDValue N0 = N.getOperand(0);
|
||||
SDValue N1 = N.getOperand(1);
|
||||
assert(N0.getValueType().getVectorNumElements() == (NumElts / 2) &&
|
||||
|
@ -5940,9 +5941,19 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
|
|||
|
||||
// If we know input saturation won't happen we can treat this
|
||||
// as a truncation shuffle.
|
||||
if (DAG.ComputeNumSignBits(N0) <= NumBitsPerElt ||
|
||||
DAG.ComputeNumSignBits(N1) <= NumBitsPerElt)
|
||||
return false;
|
||||
if (Opcode == X86ISD::PACKSS) {
|
||||
if (DAG.ComputeNumSignBits(N0) <= NumBitsPerElt ||
|
||||
DAG.ComputeNumSignBits(N1) <= NumBitsPerElt)
|
||||
return false;
|
||||
} else {
|
||||
KnownBits Known0, Known1;
|
||||
DAG.computeKnownBits(N0, Known0);
|
||||
if (Known0.countMinLeadingZeros() < NumBitsPerElt)
|
||||
return false;
|
||||
DAG.computeKnownBits(N1, Known1);
|
||||
if (Known1.countMinLeadingZeros() < NumBitsPerElt)
|
||||
return false;
|
||||
}
|
||||
|
||||
bool IsUnary = (N0 == N1);
|
||||
unsigned Offset = IsUnary ? 0 : NumElts;
|
||||
|
|
|
@ -846,16 +846,12 @@ declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readn
|
|||
define <16 x i16> @shuffle_combine_packusdw_pshufb(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
; X32-LABEL: shuffle_combine_packusdw_pshufb:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpsrld $16, %ymm0, %ymm0
|
||||
; X32-NEXT: vpackusdw %ymm0, %ymm0, %ymm0
|
||||
; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,4,5,2,3,0,1,6,7,4,5,2,3,0,1,16,17,18,19,20,21,22,23,22,23,20,21,18,19,16,17]
|
||||
; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,10,11,6,7,2,3,14,15,10,11,6,7,2,3,18,19,22,23,26,27,30,31,30,31,26,27,22,23,18,19]
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: shuffle_combine_packusdw_pshufb:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsrld $16, %ymm0, %ymm0
|
||||
; X64-NEXT: vpackusdw %ymm0, %ymm0, %ymm0
|
||||
; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,4,5,2,3,0,1,6,7,4,5,2,3,0,1,16,17,18,19,20,21,22,23,22,23,20,21,18,19,16,17]
|
||||
; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,10,11,6,7,2,3,14,15,10,11,6,7,2,3,18,19,22,23,26,27,30,31,30,31,26,27,22,23,18,19]
|
||||
; X64-NEXT: retq
|
||||
%1 = lshr <8 x i32> %a0, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
|
||||
%2 = tail call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %1, <8 x i32> %1)
|
||||
|
@ -867,18 +863,12 @@ declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readno
|
|||
define <32 x i8> @shuffle_combine_packuswb_pshufb(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
; X32-LABEL: shuffle_combine_packuswb_pshufb:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpsrlw $8, %ymm0, %ymm0
|
||||
; X32-NEXT: vpsrlw $8, %ymm1, %ymm1
|
||||
; X32-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,23,22,21,20,19,18,17,16,23,22,21,20,19,18,17,16]
|
||||
; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[15,13,11,9,7,5,3,1,15,13,11,9,7,5,3,1,31,29,27,25,23,21,19,17,31,29,27,25,23,21,19,17]
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: shuffle_combine_packuswb_pshufb:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsrlw $8, %ymm0, %ymm0
|
||||
; X64-NEXT: vpsrlw $8, %ymm1, %ymm1
|
||||
; X64-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,23,22,21,20,19,18,17,16,23,22,21,20,19,18,17,16]
|
||||
; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[15,13,11,9,7,5,3,1,15,13,11,9,7,5,3,1,31,29,27,25,23,21,19,17,31,29,27,25,23,21,19,17]
|
||||
; X64-NEXT: retq
|
||||
%1 = lshr <16 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
|
||||
%2 = lshr <16 x i16> %a1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
|
||||
|
|
|
@ -683,18 +683,12 @@ declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind rea
|
|||
define <16 x i8> @shuffle_combine_packuswb_pshufb(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
; SSE-LABEL: shuffle_combine_packuswb_pshufb:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: psrlw $8, %xmm0
|
||||
; SSE-NEXT: psrlw $8, %xmm1
|
||||
; SSE-NEXT: packuswb %xmm1, %xmm0
|
||||
; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0]
|
||||
; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[15,13,11,9,7,5,3,1,15,13,11,9,7,5,3,1]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_combine_packuswb_pshufb:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpsrlw $8, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0]
|
||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,13,11,9,7,5,3,1,15,13,11,9,7,5,3,1]
|
||||
; AVX-NEXT: retq
|
||||
%1 = lshr <8 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
|
||||
%2 = lshr <8 x i16> %a1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
|
||||
|
|
Loading…
Reference in New Issue