[X86] CombineShuffleWithExtract - only require 1 source to be EXTRACT_SUBVECTOR

We were requiring that both shuffle operands were EXTRACT_SUBVECTORs, but we can relax this to only require one of them to be.

Also, we shouldn't bother attempting this if both operands are from the lowest subvector (or not EXTRACT_SUBVECTOR at all).

llvm-svn: 364644
This commit is contained in:
Simon Pilgrim 2019-06-28 12:24:49 +00:00
parent 9dbdfe6b78
commit a54e1a0f01
4 changed files with 38 additions and 45 deletions

View File

@ -32042,16 +32042,26 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
[&](SDValue &NewRoot, SmallVectorImpl<int> &NewMask,
SmallVectorImpl<SDValue> &NewInputs) -> bool {
assert(NewMask.empty() && NewInputs.empty() && "Non-empty shuffle mask");
if (UnaryShuffle || V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
V2.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
!isa<ConstantSDNode>(V1.getOperand(1)) ||
!isa<ConstantSDNode>(V2.getOperand(1)))
if (UnaryShuffle)
return false;
SDValue Src1 = V1, Src2 = V2;
unsigned Offset1 = 0, Offset2 = 0;
if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
isa<ConstantSDNode>(V1.getOperand(1))) {
Src1 = V1.getOperand(0);
Offset1 = V1.getConstantOperandVal(1);
}
if (V2.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
isa<ConstantSDNode>(V2.getOperand(1))) {
Src2 = V2.getOperand(0);
Offset2 = V2.getConstantOperandVal(1);
}
if (Offset1 == 0 && Offset2 == 0)
return false;
// If the src vector types aren't the same, see if we can extend
// one to match the other.
SDValue Src1 = V1.getOperand(0);
SDValue Src2 = V2.getOperand(0);
if ((Src1.getValueType().getScalarType() !=
Src2.getValueType().getScalarType()) ||
!DAG.getTargetLoweringInfo().isTypeLegal(Src1.getValueType()) ||
@ -32075,8 +32085,6 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
}
}
unsigned Offset1 = V1.getConstantOperandVal(1);
unsigned Offset2 = V2.getConstantOperandVal(1);
assert(((Offset1 % VT1.getVectorNumElements()) == 0 &&
(Offset2 % VT2.getVectorNumElements()) == 0 &&
(Src1SizeInBits % RootSizeInBits) == 0 &&

View File

@ -716,18 +716,14 @@ define <16 x i8> @trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_
;
; AVX512VBMI-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_61:
; AVX512VBMI: # %bb.0:
; AVX512VBMI-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512VBMI-NEXT: vextracti128 $1, %ymm1, %xmm2
; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm3 = <u,u,u,u,1,5,9,13,u,u,u,u,u,u,u,u>
; AVX512VBMI-NEXT: vpshufb %xmm3, %xmm2, %xmm2
; AVX512VBMI-NEXT: vpshufb %xmm3, %xmm1, %xmm1
; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm2
; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm3 = <1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX512VBMI-NEXT: vpshufb %xmm3, %xmm2, %xmm2
; AVX512VBMI-NEXT: vpshufb %xmm3, %xmm0, %xmm0
; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; AVX512VBMI-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm2 = <1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX512VBMI-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX512VBMI-NEXT: vpshufb %xmm2, %xmm0, %xmm2
; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; AVX512VBMI-NEXT: vpbroadcastq {{.*#+}} zmm2 = [4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321]
; AVX512VBMI-NEXT: vpermb %zmm0, %zmm2, %zmm0
; AVX512VBMI-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; AVX512VBMI-NEXT: vzeroupper
; AVX512VBMI-NEXT: retq
;
@ -813,11 +809,8 @@ define <16 x i8> @trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_
; AVX512VBMI-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX512VBMI-NEXT: vpshufb %xmm2, %xmm0, %xmm2
; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; AVX512VBMI-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm2
; AVX512VBMI-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,1,5,9,14,u,u,u,u,u,u,u,u]
; AVX512VBMI-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,1,5,9,13,u,u,u,u,u,u,u,u]
; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; AVX512VBMI-NEXT: vpbroadcastq {{.*#+}} zmm2 = [4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257]
; AVX512VBMI-NEXT: vpermb %zmm0, %zmm2, %zmm0
; AVX512VBMI-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; AVX512VBMI-NEXT: vzeroupper
; AVX512VBMI-NEXT: retq

View File

@ -707,18 +707,14 @@ define <16 x i8> @trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_
;
; AVX512VBMI-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_61:
; AVX512VBMI: # %bb.0:
; AVX512VBMI-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512VBMI-NEXT: vextracti128 $1, %ymm1, %xmm2
; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm3 = <u,u,u,u,1,5,9,13,u,u,u,u,u,u,u,u>
; AVX512VBMI-NEXT: vpshufb %xmm3, %xmm2, %xmm2
; AVX512VBMI-NEXT: vpshufb %xmm3, %xmm1, %xmm1
; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm2
; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm3 = <1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX512VBMI-NEXT: vpshufb %xmm3, %xmm2, %xmm2
; AVX512VBMI-NEXT: vpshufb %xmm3, %xmm0, %xmm0
; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; AVX512VBMI-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm2 = <1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX512VBMI-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX512VBMI-NEXT: vpshufb %xmm2, %xmm0, %xmm2
; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; AVX512VBMI-NEXT: vpbroadcastq {{.*#+}} zmm2 = [4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321]
; AVX512VBMI-NEXT: vpermb %zmm0, %zmm2, %zmm0
; AVX512VBMI-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; AVX512VBMI-NEXT: vzeroupper
; AVX512VBMI-NEXT: retq
;
@ -804,11 +800,8 @@ define <16 x i8> @trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_
; AVX512VBMI-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX512VBMI-NEXT: vpshufb %xmm2, %xmm0, %xmm2
; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; AVX512VBMI-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm2
; AVX512VBMI-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,1,5,9,14,u,u,u,u,u,u,u,u]
; AVX512VBMI-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,1,5,9,13,u,u,u,u,u,u,u,u]
; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; AVX512VBMI-NEXT: vpbroadcastq {{.*#+}} zmm2 = [4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257]
; AVX512VBMI-NEXT: vpermb %zmm0, %zmm2, %zmm0
; AVX512VBMI-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; AVX512VBMI-NEXT: vzeroupper
; AVX512VBMI-NEXT: retq

View File

@ -2994,9 +2994,8 @@ define <32 x i8> @shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_
;
; AVX512VLVBMI-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
; AVX512VLVBMI: # %bb.0:
; AVX512VLVBMI-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,32,32,32,32,32,32,32,32,15,15,15,15,15,15,15,15,32,32,32,32,32,32,32,32]
; AVX512VLVBMI-NEXT: # ymm2 = mem[0,1,0,1]
; AVX512VLVBMI-NEXT: vpermt2b %ymm1, %ymm2, %ymm0
; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,16,16,16,16,16,16,16,16]
; AVX512VLVBMI-NEXT: vpermt2b %xmm1, %xmm2, %xmm0
; AVX512VLVBMI-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
ret <32 x i8> %shuffle