forked from OSchip/llvm-project
[X86] CombineShuffleWithExtract - only require 1 source to be EXTRACT_SUBVECTOR
We were requiring that both shuffle operands were EXTRACT_SUBVECTORs, but we can relax this to only require one of them to be. Also, we shouldn't bother attempting this if both operands are from the lowest subvector (or not EXTRACT_SUBVECTOR at all). llvm-svn: 364644
This commit is contained in:
parent
9dbdfe6b78
commit
a54e1a0f01
|
@ -32042,16 +32042,26 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
|||
[&](SDValue &NewRoot, SmallVectorImpl<int> &NewMask,
|
||||
SmallVectorImpl<SDValue> &NewInputs) -> bool {
|
||||
assert(NewMask.empty() && NewInputs.empty() && "Non-empty shuffle mask");
|
||||
if (UnaryShuffle || V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
|
||||
V2.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
|
||||
!isa<ConstantSDNode>(V1.getOperand(1)) ||
|
||||
!isa<ConstantSDNode>(V2.getOperand(1)))
|
||||
if (UnaryShuffle)
|
||||
return false;
|
||||
|
||||
SDValue Src1 = V1, Src2 = V2;
|
||||
unsigned Offset1 = 0, Offset2 = 0;
|
||||
if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
|
||||
isa<ConstantSDNode>(V1.getOperand(1))) {
|
||||
Src1 = V1.getOperand(0);
|
||||
Offset1 = V1.getConstantOperandVal(1);
|
||||
}
|
||||
if (V2.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
|
||||
isa<ConstantSDNode>(V2.getOperand(1))) {
|
||||
Src2 = V2.getOperand(0);
|
||||
Offset2 = V2.getConstantOperandVal(1);
|
||||
}
|
||||
if (Offset1 == 0 && Offset2 == 0)
|
||||
return false;
|
||||
|
||||
// If the src vector types aren't the same, see if we can extend
|
||||
// one to match the other.
|
||||
SDValue Src1 = V1.getOperand(0);
|
||||
SDValue Src2 = V2.getOperand(0);
|
||||
if ((Src1.getValueType().getScalarType() !=
|
||||
Src2.getValueType().getScalarType()) ||
|
||||
!DAG.getTargetLoweringInfo().isTypeLegal(Src1.getValueType()) ||
|
||||
|
@ -32075,8 +32085,6 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
|||
}
|
||||
}
|
||||
|
||||
unsigned Offset1 = V1.getConstantOperandVal(1);
|
||||
unsigned Offset2 = V2.getConstantOperandVal(1);
|
||||
assert(((Offset1 % VT1.getVectorNumElements()) == 0 &&
|
||||
(Offset2 % VT2.getVectorNumElements()) == 0 &&
|
||||
(Src1SizeInBits % RootSizeInBits) == 0 &&
|
||||
|
|
|
@ -716,18 +716,14 @@ define <16 x i8> @trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_
|
|||
;
|
||||
; AVX512VBMI-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_61:
|
||||
; AVX512VBMI: # %bb.0:
|
||||
; AVX512VBMI-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
||||
; AVX512VBMI-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm3 = <u,u,u,u,1,5,9,13,u,u,u,u,u,u,u,u>
|
||||
; AVX512VBMI-NEXT: vpshufb %xmm3, %xmm2, %xmm2
|
||||
; AVX512VBMI-NEXT: vpshufb %xmm3, %xmm1, %xmm1
|
||||
; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
||||
; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm3 = <1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u>
|
||||
; AVX512VBMI-NEXT: vpshufb %xmm3, %xmm2, %xmm2
|
||||
; AVX512VBMI-NEXT: vpshufb %xmm3, %xmm0, %xmm0
|
||||
; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
; AVX512VBMI-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
|
||||
; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm2 = <1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u>
|
||||
; AVX512VBMI-NEXT: vpshufb %xmm2, %xmm1, %xmm1
|
||||
; AVX512VBMI-NEXT: vpshufb %xmm2, %xmm0, %xmm2
|
||||
; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; AVX512VBMI-NEXT: vpbroadcastq {{.*#+}} zmm2 = [4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321]
|
||||
; AVX512VBMI-NEXT: vpermb %zmm0, %zmm2, %zmm0
|
||||
; AVX512VBMI-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
|
||||
; AVX512VBMI-NEXT: vzeroupper
|
||||
; AVX512VBMI-NEXT: retq
|
||||
;
|
||||
|
@ -813,11 +809,8 @@ define <16 x i8> @trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_
|
|||
; AVX512VBMI-NEXT: vpshufb %xmm2, %xmm1, %xmm1
|
||||
; AVX512VBMI-NEXT: vpshufb %xmm2, %xmm0, %xmm2
|
||||
; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; AVX512VBMI-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
||||
; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; AVX512VBMI-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,1,5,9,14,u,u,u,u,u,u,u,u]
|
||||
; AVX512VBMI-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,1,5,9,13,u,u,u,u,u,u,u,u]
|
||||
; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
; AVX512VBMI-NEXT: vpbroadcastq {{.*#+}} zmm2 = [4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257]
|
||||
; AVX512VBMI-NEXT: vpermb %zmm0, %zmm2, %zmm0
|
||||
; AVX512VBMI-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
|
||||
; AVX512VBMI-NEXT: vzeroupper
|
||||
; AVX512VBMI-NEXT: retq
|
||||
|
|
|
@ -707,18 +707,14 @@ define <16 x i8> @trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_
|
|||
;
|
||||
; AVX512VBMI-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_61:
|
||||
; AVX512VBMI: # %bb.0:
|
||||
; AVX512VBMI-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
||||
; AVX512VBMI-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm3 = <u,u,u,u,1,5,9,13,u,u,u,u,u,u,u,u>
|
||||
; AVX512VBMI-NEXT: vpshufb %xmm3, %xmm2, %xmm2
|
||||
; AVX512VBMI-NEXT: vpshufb %xmm3, %xmm1, %xmm1
|
||||
; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
||||
; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm3 = <1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u>
|
||||
; AVX512VBMI-NEXT: vpshufb %xmm3, %xmm2, %xmm2
|
||||
; AVX512VBMI-NEXT: vpshufb %xmm3, %xmm0, %xmm0
|
||||
; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
; AVX512VBMI-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
|
||||
; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm2 = <1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u>
|
||||
; AVX512VBMI-NEXT: vpshufb %xmm2, %xmm1, %xmm1
|
||||
; AVX512VBMI-NEXT: vpshufb %xmm2, %xmm0, %xmm2
|
||||
; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; AVX512VBMI-NEXT: vpbroadcastq {{.*#+}} zmm2 = [4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321]
|
||||
; AVX512VBMI-NEXT: vpermb %zmm0, %zmm2, %zmm0
|
||||
; AVX512VBMI-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
|
||||
; AVX512VBMI-NEXT: vzeroupper
|
||||
; AVX512VBMI-NEXT: retq
|
||||
;
|
||||
|
@ -804,11 +800,8 @@ define <16 x i8> @trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_
|
|||
; AVX512VBMI-NEXT: vpshufb %xmm2, %xmm1, %xmm1
|
||||
; AVX512VBMI-NEXT: vpshufb %xmm2, %xmm0, %xmm2
|
||||
; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; AVX512VBMI-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
||||
; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; AVX512VBMI-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,1,5,9,14,u,u,u,u,u,u,u,u]
|
||||
; AVX512VBMI-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,1,5,9,13,u,u,u,u,u,u,u,u]
|
||||
; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
; AVX512VBMI-NEXT: vpbroadcastq {{.*#+}} zmm2 = [4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257]
|
||||
; AVX512VBMI-NEXT: vpermb %zmm0, %zmm2, %zmm0
|
||||
; AVX512VBMI-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
|
||||
; AVX512VBMI-NEXT: vzeroupper
|
||||
; AVX512VBMI-NEXT: retq
|
||||
|
|
|
@ -2994,9 +2994,8 @@ define <32 x i8> @shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_
|
|||
;
|
||||
; AVX512VLVBMI-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
|
||||
; AVX512VLVBMI: # %bb.0:
|
||||
; AVX512VLVBMI-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,32,32,32,32,32,32,32,32,15,15,15,15,15,15,15,15,32,32,32,32,32,32,32,32]
|
||||
; AVX512VLVBMI-NEXT: # ymm2 = mem[0,1,0,1]
|
||||
; AVX512VLVBMI-NEXT: vpermt2b %ymm1, %ymm2, %ymm0
|
||||
; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,16,16,16,16,16,16,16,16]
|
||||
; AVX512VLVBMI-NEXT: vpermt2b %xmm1, %xmm2, %xmm0
|
||||
; AVX512VLVBMI-NEXT: retq
|
||||
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
ret <32 x i8> %shuffle
|
||||
|
|
Loading…
Reference in New Issue