forked from OSchip/llvm-project
[X86][AVX512] Adding a pattern for broadcastm intrinsic.
Differential Revision: https://reviews.llvm.org/D38312 Change-Id: I6551fb13879e098aed74de410e29815cf37d9ab5 llvm-svn: 316890
This commit is contained in:
parent
390fc57771
commit
70280f9a0d
|
@ -6687,6 +6687,44 @@ static bool isUseOfShuffle(SDNode *N) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check if the current node of build vector is a zero extended vector.
|
||||||
|
// If so, return the value extended.
|
||||||
|
// For example: (0,0,0,a,0,0,0,a,0,0,0,a,0,0,0,a) returns a.
|
||||||
|
// NumElt - return the number of zero extended identical values.
|
||||||
|
// EltType - return the type of the value include the zero extend.
|
||||||
|
static SDValue isSplatZeroExtended(const BuildVectorSDNode *Op,
|
||||||
|
unsigned &NumElt, MVT &EltType) {
|
||||||
|
SDValue ExtValue = Op->getOperand(0);
|
||||||
|
unsigned NumElts = Op->getNumOperands();
|
||||||
|
unsigned Delta = NumElts;
|
||||||
|
|
||||||
|
for (unsigned i = 1; i < NumElts; i++) {
|
||||||
|
if (Op->getOperand(i) == ExtValue) {
|
||||||
|
Delta = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (!(Op->getOperand(i).isUndef() || isNullConstant(Op->getOperand(i))))
|
||||||
|
return SDValue();
|
||||||
|
}
|
||||||
|
if (!isPowerOf2_32(Delta) || Delta == 1)
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
for (unsigned i = Delta; i < NumElts; i++) {
|
||||||
|
if (i % Delta == 0) {
|
||||||
|
if (Op->getOperand(i) != ExtValue)
|
||||||
|
return SDValue();
|
||||||
|
} else if (!(isNullConstant(Op->getOperand(i)) ||
|
||||||
|
Op->getOperand(i).isUndef()))
|
||||||
|
return SDValue();
|
||||||
|
}
|
||||||
|
unsigned EltSize =
|
||||||
|
Op->getSimpleValueType(0).getScalarSizeInBits();
|
||||||
|
unsigned ExtVTSize = EltSize * Delta;
|
||||||
|
EltType = MVT::getIntegerVT(ExtVTSize);
|
||||||
|
NumElt = NumElts / Delta;
|
||||||
|
return ExtValue;
|
||||||
|
}
|
||||||
|
|
||||||
/// Attempt to use the vbroadcast instruction to generate a splat value
|
/// Attempt to use the vbroadcast instruction to generate a splat value
|
||||||
/// from a splat BUILD_VECTOR which uses:
|
/// from a splat BUILD_VECTOR which uses:
|
||||||
/// a. A single scalar load, or a constant.
|
/// a. A single scalar load, or a constant.
|
||||||
|
@ -6709,6 +6747,32 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
|
||||||
assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&
|
assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&
|
||||||
"Unsupported vector type for broadcast.");
|
"Unsupported vector type for broadcast.");
|
||||||
|
|
||||||
|
// Attempt to use VBROADCASTM
|
||||||
|
// From this paterrn:
|
||||||
|
// a. t0 = (zext_i64 (bitcast_i8 v2i1 X))
|
||||||
|
// b. t1 = (build_vector t0 t0)
|
||||||
|
//
|
||||||
|
// Create (VBROADCASTM v2i1 X)
|
||||||
|
if (Subtarget.hasCDI() && (VT.is512BitVector() || Subtarget.hasVLX())) {
|
||||||
|
MVT EltType;
|
||||||
|
unsigned NumElts;
|
||||||
|
SDValue ZeroExtended = isSplatZeroExtended(BVOp, NumElts, EltType);
|
||||||
|
if (ZeroExtended && ZeroExtended.getOpcode() == ISD::BITCAST) {
|
||||||
|
SDValue BOperand = ZeroExtended.getOperand(0);
|
||||||
|
if (BOperand.getSimpleValueType().getVectorElementType() == MVT::i1) {
|
||||||
|
if ((EltType == MVT::i64 &&
|
||||||
|
VT.getVectorElementType() == MVT::i8) || // for broadcastmb2q
|
||||||
|
(EltType == MVT::i32 &&
|
||||||
|
VT.getVectorElementType() == MVT::i16)) { // for broadcastmw2d
|
||||||
|
SDValue Brdcst =
|
||||||
|
DAG.getNode(X86ISD::VBROADCASTM, dl,
|
||||||
|
MVT::getVectorVT(EltType, NumElts), BOperand);
|
||||||
|
return DAG.getBitcast(VT, Brdcst);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
BitVector UndefElements;
|
BitVector UndefElements;
|
||||||
SDValue Ld = BVOp->getSplatValue(&UndefElements);
|
SDValue Ld = BVOp->getSplatValue(&UndefElements);
|
||||||
|
|
||||||
|
|
|
@ -20,10 +20,7 @@ define <2 x i64> @test_mm_epi64(<8 x i16> %a, <8 x i16> %b) {
|
||||||
; AVX512VLCDBW-LABEL: test_mm_epi64:
|
; AVX512VLCDBW-LABEL: test_mm_epi64:
|
||||||
; AVX512VLCDBW: # BB#0: # %entry
|
; AVX512VLCDBW: # BB#0: # %entry
|
||||||
; AVX512VLCDBW-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
|
; AVX512VLCDBW-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
|
||||||
; AVX512VLCDBW-NEXT: kmovd %k0, %eax
|
; AVX512VLCDBW-NEXT: vpbroadcastmb2q %k0, %xmm0
|
||||||
; AVX512VLCDBW-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
||||||
; AVX512VLCDBW-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
|
|
||||||
; AVX512VLCDBW-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
|
|
||||||
; AVX512VLCDBW-NEXT: retq
|
; AVX512VLCDBW-NEXT: retq
|
||||||
;
|
;
|
||||||
; X86-AVX512VLCDBW-LABEL: test_mm_epi64:
|
; X86-AVX512VLCDBW-LABEL: test_mm_epi64:
|
||||||
|
@ -62,23 +59,13 @@ define <4 x i32> @test_mm_epi32(<16 x i8> %a, <16 x i8> %b) {
|
||||||
; AVX512VLCDBW-LABEL: test_mm_epi32:
|
; AVX512VLCDBW-LABEL: test_mm_epi32:
|
||||||
; AVX512VLCDBW: # BB#0: # %entry
|
; AVX512VLCDBW: # BB#0: # %entry
|
||||||
; AVX512VLCDBW-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
|
; AVX512VLCDBW-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
|
||||||
; AVX512VLCDBW-NEXT: kmovd %k0, %eax
|
; AVX512VLCDBW-NEXT: vpbroadcastmw2d %k0, %xmm0
|
||||||
; AVX512VLCDBW-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
||||||
; AVX512VLCDBW-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
|
|
||||||
; AVX512VLCDBW-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
|
|
||||||
; AVX512VLCDBW-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
|
|
||||||
; AVX512VLCDBW-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
|
|
||||||
; AVX512VLCDBW-NEXT: retq
|
; AVX512VLCDBW-NEXT: retq
|
||||||
;
|
;
|
||||||
; X86-AVX512VLCDBW-LABEL: test_mm_epi32:
|
; X86-AVX512VLCDBW-LABEL: test_mm_epi32:
|
||||||
; X86-AVX512VLCDBW: # BB#0: # %entry
|
; X86-AVX512VLCDBW: # BB#0: # %entry
|
||||||
; X86-AVX512VLCDBW-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
|
; X86-AVX512VLCDBW-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
|
||||||
; X86-AVX512VLCDBW-NEXT: kmovd %k0, %eax
|
; X86-AVX512VLCDBW-NEXT: vpbroadcastmw2d %k0, %xmm0
|
||||||
; X86-AVX512VLCDBW-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
||||||
; X86-AVX512VLCDBW-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
|
|
||||||
; X86-AVX512VLCDBW-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
|
|
||||||
; X86-AVX512VLCDBW-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
|
|
||||||
; X86-AVX512VLCDBW-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
|
|
||||||
; X86-AVX512VLCDBW-NEXT: retl
|
; X86-AVX512VLCDBW-NEXT: retl
|
||||||
entry:
|
entry:
|
||||||
%0 = icmp eq <16 x i8> %a, %b
|
%0 = icmp eq <16 x i8> %a, %b
|
||||||
|
@ -100,27 +87,13 @@ define <16 x i32> @test_mm512_epi32(<16 x i32> %a, <16 x i32> %b) {
|
||||||
; AVX512VLCDBW-LABEL: test_mm512_epi32:
|
; AVX512VLCDBW-LABEL: test_mm512_epi32:
|
||||||
; AVX512VLCDBW: # BB#0: # %entry
|
; AVX512VLCDBW: # BB#0: # %entry
|
||||||
; AVX512VLCDBW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
|
; AVX512VLCDBW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
|
||||||
; AVX512VLCDBW-NEXT: kmovd %k0, %eax
|
; AVX512VLCDBW-NEXT: vpbroadcastmw2d %k0, %zmm0
|
||||||
; AVX512VLCDBW-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
||||||
; AVX512VLCDBW-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
|
|
||||||
; AVX512VLCDBW-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
|
|
||||||
; AVX512VLCDBW-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
|
|
||||||
; AVX512VLCDBW-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
|
|
||||||
; AVX512VLCDBW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
|
||||||
; AVX512VLCDBW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
|
|
||||||
; AVX512VLCDBW-NEXT: retq
|
; AVX512VLCDBW-NEXT: retq
|
||||||
;
|
;
|
||||||
; X86-AVX512VLCDBW-LABEL: test_mm512_epi32:
|
; X86-AVX512VLCDBW-LABEL: test_mm512_epi32:
|
||||||
; X86-AVX512VLCDBW: # BB#0: # %entry
|
; X86-AVX512VLCDBW: # BB#0: # %entry
|
||||||
; X86-AVX512VLCDBW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
|
; X86-AVX512VLCDBW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
|
||||||
; X86-AVX512VLCDBW-NEXT: kmovd %k0, %eax
|
; X86-AVX512VLCDBW-NEXT: vpbroadcastmw2d %k0, %zmm0
|
||||||
; X86-AVX512VLCDBW-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
||||||
; X86-AVX512VLCDBW-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
|
|
||||||
; X86-AVX512VLCDBW-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
|
|
||||||
; X86-AVX512VLCDBW-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
|
|
||||||
; X86-AVX512VLCDBW-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
|
|
||||||
; X86-AVX512VLCDBW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
|
||||||
; X86-AVX512VLCDBW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
|
|
||||||
; X86-AVX512VLCDBW-NEXT: retl
|
; X86-AVX512VLCDBW-NEXT: retl
|
||||||
entry:
|
entry:
|
||||||
%0 = icmp eq <16 x i32> %a, %b
|
%0 = icmp eq <16 x i32> %a, %b
|
||||||
|
@ -145,12 +118,7 @@ define <8 x i64> @test_mm512_epi64(<8 x i32> %a, <8 x i32> %b) {
|
||||||
; AVX512VLCDBW-LABEL: test_mm512_epi64:
|
; AVX512VLCDBW-LABEL: test_mm512_epi64:
|
||||||
; AVX512VLCDBW: # BB#0: # %entry
|
; AVX512VLCDBW: # BB#0: # %entry
|
||||||
; AVX512VLCDBW-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
|
; AVX512VLCDBW-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
|
||||||
; AVX512VLCDBW-NEXT: kmovd %k0, %eax
|
; AVX512VLCDBW-NEXT: vpbroadcastmb2q %k0, %zmm0
|
||||||
; AVX512VLCDBW-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
||||||
; AVX512VLCDBW-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
|
|
||||||
; AVX512VLCDBW-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
|
|
||||||
; AVX512VLCDBW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
|
||||||
; AVX512VLCDBW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
|
|
||||||
; AVX512VLCDBW-NEXT: retq
|
; AVX512VLCDBW-NEXT: retq
|
||||||
;
|
;
|
||||||
; X86-AVX512VLCDBW-LABEL: test_mm512_epi64:
|
; X86-AVX512VLCDBW-LABEL: test_mm512_epi64:
|
||||||
|
@ -188,11 +156,7 @@ define <4 x i64> @test_mm256_epi64(<8 x i32> %a, <8 x i32> %b) {
|
||||||
; AVX512VLCDBW-LABEL: test_mm256_epi64:
|
; AVX512VLCDBW-LABEL: test_mm256_epi64:
|
||||||
; AVX512VLCDBW: # BB#0: # %entry
|
; AVX512VLCDBW: # BB#0: # %entry
|
||||||
; AVX512VLCDBW-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
|
; AVX512VLCDBW-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
|
||||||
; AVX512VLCDBW-NEXT: kmovd %k0, %eax
|
; AVX512VLCDBW-NEXT: vpbroadcastmb2q %k0, %ymm0
|
||||||
; AVX512VLCDBW-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
||||||
; AVX512VLCDBW-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
|
|
||||||
; AVX512VLCDBW-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
|
|
||||||
; AVX512VLCDBW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
|
||||||
; AVX512VLCDBW-NEXT: retq
|
; AVX512VLCDBW-NEXT: retq
|
||||||
;
|
;
|
||||||
; X86-AVX512VLCDBW-LABEL: test_mm256_epi64:
|
; X86-AVX512VLCDBW-LABEL: test_mm256_epi64:
|
||||||
|
@ -232,25 +196,13 @@ define <8 x i32> @test_mm256_epi32(<16 x i16> %a, <16 x i16> %b) {
|
||||||
; AVX512VLCDBW-LABEL: test_mm256_epi32:
|
; AVX512VLCDBW-LABEL: test_mm256_epi32:
|
||||||
; AVX512VLCDBW: # BB#0: # %entry
|
; AVX512VLCDBW: # BB#0: # %entry
|
||||||
; AVX512VLCDBW-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
|
; AVX512VLCDBW-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
|
||||||
; AVX512VLCDBW-NEXT: kmovd %k0, %eax
|
; AVX512VLCDBW-NEXT: vpbroadcastmw2d %k0, %ymm0
|
||||||
; AVX512VLCDBW-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
||||||
; AVX512VLCDBW-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
|
|
||||||
; AVX512VLCDBW-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
|
|
||||||
; AVX512VLCDBW-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
|
|
||||||
; AVX512VLCDBW-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
|
|
||||||
; AVX512VLCDBW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
|
||||||
; AVX512VLCDBW-NEXT: retq
|
; AVX512VLCDBW-NEXT: retq
|
||||||
;
|
;
|
||||||
; X86-AVX512VLCDBW-LABEL: test_mm256_epi32:
|
; X86-AVX512VLCDBW-LABEL: test_mm256_epi32:
|
||||||
; X86-AVX512VLCDBW: # BB#0: # %entry
|
; X86-AVX512VLCDBW: # BB#0: # %entry
|
||||||
; X86-AVX512VLCDBW-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
|
; X86-AVX512VLCDBW-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
|
||||||
; X86-AVX512VLCDBW-NEXT: kmovd %k0, %eax
|
; X86-AVX512VLCDBW-NEXT: vpbroadcastmw2d %k0, %ymm0
|
||||||
; X86-AVX512VLCDBW-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
||||||
; X86-AVX512VLCDBW-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
|
|
||||||
; X86-AVX512VLCDBW-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
|
|
||||||
; X86-AVX512VLCDBW-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
|
|
||||||
; X86-AVX512VLCDBW-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
|
|
||||||
; X86-AVX512VLCDBW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
|
||||||
; X86-AVX512VLCDBW-NEXT: retl
|
; X86-AVX512VLCDBW-NEXT: retl
|
||||||
entry:
|
entry:
|
||||||
%0 = icmp eq <16 x i16> %a, %b
|
%0 = icmp eq <16 x i16> %a, %b
|
||||||
|
|
Loading…
Reference in New Issue