forked from OSchip/llvm-project
[DAGCombiner] Add a combine to turn a build vector of zero extends of extract vector elts into a vector zero extend and possibly an extract subvector.
llvm-svn: 329509
This commit is contained in:
parent
48f4d4f428
commit
5b95eae1c3
|
@ -454,6 +454,7 @@ namespace {
|
|||
SDValue ReduceLoadOpStoreWidth(SDNode *N);
|
||||
SDValue splitMergedValStore(StoreSDNode *ST);
|
||||
SDValue TransformFPLoadStorePair(SDNode *N);
|
||||
SDValue convertBuildVecZextToZext(SDNode *N);
|
||||
SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
|
||||
SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
|
||||
SDValue reduceBuildVecToShuffle(SDNode *N);
|
||||
|
@ -14977,6 +14978,54 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
|
|||
return Shuffles[0];
|
||||
}
|
||||
|
||||
// Try to turn a build vector of zero extends of extract vector elts into a
|
||||
// a vector zero extend and possibly an extract subvector.
|
||||
// TODO: Support sign extend or any extend?
|
||||
// TODO: Allow undef elements?
|
||||
// TODO: Don't require the extracts to start at element 0.
|
||||
SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
|
||||
if (LegalOperations)
|
||||
return SDValue();
|
||||
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
SDValue Op0 = N->getOperand(0);
|
||||
auto checkElem = [&](SDValue Op) -> int64_t {
|
||||
if (Op.getOpcode() == ISD::ZERO_EXTEND &&
|
||||
Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
|
||||
Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
|
||||
if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
|
||||
return C->getZExtValue();
|
||||
return -1;
|
||||
};
|
||||
|
||||
// Make sure the first element matches
|
||||
// (zext (extract_vector_elt X, C))
|
||||
int64_t Offset = checkElem(Op0);
|
||||
if (Offset < 0)
|
||||
return SDValue();
|
||||
|
||||
unsigned NumElems = N->getNumOperands();
|
||||
SDValue In = Op0.getOperand(0).getOperand(0);
|
||||
EVT InSVT = In.getValueType().getScalarType();
|
||||
EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
|
||||
|
||||
// Don't create an illegal input type after type legalization.
|
||||
if (LegalTypes && !TLI.isTypeLegal(InVT))
|
||||
return SDValue();
|
||||
|
||||
// Ensure all the elements come from the same vector and are adjacent.
|
||||
for (unsigned i = 1; i != NumElems; ++i) {
|
||||
if ((Offset + i) != checkElem(N->getOperand(i)))
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDLoc DL(N);
|
||||
In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
|
||||
Op0.getOperand(0).getOperand(1));
|
||||
return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, In);
|
||||
}
|
||||
|
||||
SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
|
@ -15036,6 +15085,9 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
|
|||
Op0.getOperand(0), Op0.getOperand(1));
|
||||
}
|
||||
|
||||
if (SDValue V = convertBuildVecZextToZext(N))
|
||||
return V;
|
||||
|
||||
if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
|
||||
return V;
|
||||
|
||||
|
|
|
@ -36,18 +36,14 @@ define float @g(<4 x i16>* nocapture %in) {
|
|||
ret float %3
|
||||
}
|
||||
|
||||
; The backend generates for the following code an
|
||||
; (and 0xff (i32 extract_vector_elt (zext load <4 x i8> to 4 x i16)))
|
||||
;
|
||||
; The and is not redundant and cannot be removed. Since
|
||||
; extract_vector_elt is doing an implicit any_ext, the and
|
||||
; is required to guarantee that the top bits are set to zero.
|
||||
|
||||
; Ideally should be a zext from <4 x i8> to <4 x 32>.
|
||||
; Make sure we generate zext from <4 x i8> to <4 x 32>.
|
||||
|
||||
; CHECK-LABEL: h:
|
||||
; CHECK: vld1.32
|
||||
; CHECK: uxtb
|
||||
; CHECK: vmovl.u8 q8, d16
|
||||
; CHECK: vmovl.u16 q8, d16
|
||||
; CHECK: vmov r0, r1, d16
|
||||
; CHECK: vmov r2, r3, d17
|
||||
define <4 x i32> @h(<4 x i8> *%in) {
|
||||
%1 = load <4 x i8>, <4 x i8>* %in, align 4
|
||||
%2 = extractelement <4 x i8> %1, i32 0
|
||||
|
|
|
@ -167,37 +167,34 @@ define <4 x i64> @_mul4xi32toi64a(<4 x i32>, <4 x i32>) {
|
|||
;
|
||||
; SSE42-LABEL: _mul4xi32toi64a:
|
||||
; SSE42: # %bb.0:
|
||||
; SSE42-NEXT: pxor %xmm3, %xmm3
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; SSE42-NEXT: pmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero
|
||||
; SSE42-NEXT: pmovzxdq {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero
|
||||
; SSE42-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
|
||||
; SSE42-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
|
||||
; SSE42-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
|
||||
; SSE42-NEXT: pmuludq %xmm0, %xmm1
|
||||
; SSE42-NEXT: pmuludq %xmm4, %xmm2
|
||||
; SSE42-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE42-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero
|
||||
; SSE42-NEXT: pmuludq %xmm3, %xmm2
|
||||
; SSE42-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
|
||||
; SSE42-NEXT: pmuludq %xmm4, %xmm0
|
||||
; SSE42-NEXT: movdqa %xmm2, %xmm1
|
||||
; SSE42-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: _mul4xi32toi64a:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm3 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
|
||||
; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
|
||||
; AVX1-NEXT: vpmuludq %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX1-NEXT: vpmuludq %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: _mul4xi32toi64a:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm3 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
||||
; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
%f00 = extractelement <4 x i32> %0, i32 0
|
||||
|
|
Loading…
Reference in New Issue