[X86][SSE4A] Add support for shuffle combining to EXTRQ.

llvm-svn: 307254
This commit is contained in:
Simon Pilgrim 2017-07-06 12:22:58 +00:00
parent 21fb07b715
commit cc0f785dca
2 changed files with 40 additions and 25 deletions

View File

@ -9364,7 +9364,7 @@ static bool matchVectorShuffleAsEXTRQ(MVT VT, SDValue &V1, SDValue &V2,
int Idx = -1;
for (int i = 0; i != Len; ++i) {
int M = Mask[i];
if (M < 0)
if (M == SM_SentinelUndef)
continue;
SDValue &V = (M < Size ? V1 : V2);
M = M % Size;
@ -27696,6 +27696,33 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
return true;
}
// Annoyingly, SSE4A instructions don't map into the above match helpers.
if (Subtarget.hasSSE4A() && AllowIntDomain && RootSizeInBits == 128) {
ShuffleVT = MVT::getIntegerVT(MaskEltSizeInBits);
ShuffleVT = MVT::getVectorVT(ShuffleVT, NumMaskElts);
APInt Zeroable(NumMaskElts, 0);
for (unsigned i = 0; i != NumMaskElts; ++i)
if (isUndefOrZero(Mask[i]))
Zeroable.setBit(i);
uint64_t BitLen, BitIdx;
if (matchVectorShuffleAsEXTRQ(ShuffleVT, V1, V2, Mask, BitLen, BitIdx,
Zeroable)) {
if (Depth == 1 && Root.getOpcode() == X86ISD::EXTRQI)
return false; // Nothing to do!
V1 = DAG.getBitcast(ShuffleVT, V1);
DCI.AddToWorklist(V1.getNode());
Res = DAG.getNode(X86ISD::EXTRQI, DL, ShuffleVT, V1,
DAG.getConstant(BitLen, DL, MVT::i8),
DAG.getConstant(BitIdx, DL, MVT::i8));
DCI.AddToWorklist(Res.getNode());
DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
/*AddTo*/ true);
return true;
}
}
// Don't try to re-form single instruction chains under any circumstances now
// that we've done encoding canonicalization for them.
if (Depth < 2)

View File

@ -9,30 +9,20 @@
declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
define <16 x i8> @combine_extrqi_pshufb_16i8(<16 x i8> %a0) {
; SSE-LABEL: combine_extrqi_pshufb_16i8:
; SSE: # BB#0:
; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,2],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
; SSE-NEXT: retq
;
; AVX-LABEL: combine_extrqi_pshufb_16i8:
; AVX: # BB#0:
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,2],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
; AVX-NEXT: retq
; ALL-LABEL: combine_extrqi_pshufb_16i8:
; ALL: # BB#0:
; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[1,2],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
; ALL-NEXT: retq
%1 = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 2, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 255, i8 255, i8 255, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
ret <16 x i8> %2
}
define <8 x i16> @combine_extrqi_pshufb_8i16(<8 x i16> %a0) {
; SSE-LABEL: combine_extrqi_pshufb_8i16:
; SSE: # BB#0:
; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
; SSE-NEXT: retq
;
; AVX-LABEL: combine_extrqi_pshufb_8i16:
; AVX: # BB#0:
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
; AVX-NEXT: retq
; ALL-LABEL: combine_extrqi_pshufb_8i16:
; ALL: # BB#0:
; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
; ALL-NEXT: retq
%1 = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 2, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef>
%2 = bitcast <8 x i16> %1 to <16 x i8>
%3 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %2, <16 x i8> <i8 0, i8 1, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
@ -43,9 +33,8 @@ define <8 x i16> @combine_extrqi_pshufb_8i16(<8 x i16> %a0) {
define <16 x i8> @combine_insertqi_pshufb_16i8(<16 x i8> %a0, <16 x i8> %a1) {
; SSSE3-LABEL: combine_insertqi_pshufb_16i8:
; SSSE3: # BB#0:
; SSSE3-NEXT: movl $65535, %eax # imm = 0xFFFF
; SSSE3-NEXT: movd %eax, %xmm0
; SSSE3-NEXT: pand %xmm1, %xmm0
; SSSE3-NEXT: extrq {{.*#+}} xmm1 = xmm1[0,1],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE42-LABEL: combine_insertqi_pshufb_16i8:
@ -65,9 +54,8 @@ define <16 x i8> @combine_insertqi_pshufb_16i8(<16 x i8> %a0, <16 x i8> %a1) {
define <8 x i16> @combine_insertqi_pshufb_8i16(<8 x i16> %a0, <8 x i16> %a1) {
; SSSE3-LABEL: combine_insertqi_pshufb_8i16:
; SSSE3: # BB#0:
; SSSE3-NEXT: movl $65535, %eax # imm = 0xFFFF
; SSSE3-NEXT: movd %eax, %xmm0
; SSSE3-NEXT: pand %xmm1, %xmm0
; SSSE3-NEXT: extrq {{.*#+}} xmm1 = xmm1[0,1],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE42-LABEL: combine_insertqi_pshufb_8i16: