[DAGCombiner] Add a combine to turn a build vector of zero extends of extract vector elts into a vector zero extend and possibly an extract subvector.

llvm-svn: 329509
This commit is contained in:
Craig Topper 2018-04-07 19:09:50 +00:00
parent 48f4d4f428
commit 5b95eae1c3
3 changed files with 74 additions and 29 deletions

View File

@ -454,6 +454,7 @@ namespace {
SDValue ReduceLoadOpStoreWidth(SDNode *N);
SDValue splitMergedValStore(StoreSDNode *ST);
SDValue TransformFPLoadStorePair(SDNode *N);
SDValue convertBuildVecZextToZext(SDNode *N);
SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
SDValue reduceBuildVecToShuffle(SDNode *N);
@ -14977,6 +14978,54 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
return Shuffles[0];
}
// Try to turn a build vector of zero extends of extract vector elts into a
// a vector zero extend and possibly an extract subvector.
// TODO: Support sign extend or any extend?
// TODO: Allow undef elements?
// TODO: Don't require the extracts to start at element 0.
SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
if (LegalOperations)
return SDValue();
EVT VT = N->getValueType(0);
SDValue Op0 = N->getOperand(0);
auto checkElem = [&](SDValue Op) -> int64_t {
if (Op.getOpcode() == ISD::ZERO_EXTEND &&
Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
return C->getZExtValue();
return -1;
};
// Make sure the first element matches
// (zext (extract_vector_elt X, C))
int64_t Offset = checkElem(Op0);
if (Offset < 0)
return SDValue();
unsigned NumElems = N->getNumOperands();
SDValue In = Op0.getOperand(0).getOperand(0);
EVT InSVT = In.getValueType().getScalarType();
EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
// Don't create an illegal input type after type legalization.
if (LegalTypes && !TLI.isTypeLegal(InVT))
return SDValue();
// Ensure all the elements come from the same vector and are adjacent.
for (unsigned i = 1; i != NumElems; ++i) {
if ((Offset + i) != checkElem(N->getOperand(i)))
return SDValue();
}
SDLoc DL(N);
In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
Op0.getOperand(0).getOperand(1));
return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, In);
}
SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
EVT VT = N->getValueType(0);
@ -15036,6 +15085,9 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
Op0.getOperand(0), Op0.getOperand(1));
}
if (SDValue V = convertBuildVecZextToZext(N))
return V;
if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
return V;

View File

@ -36,18 +36,14 @@ define float @g(<4 x i16>* nocapture %in) {
ret float %3
}
; The backend generates for the following code an
; (and 0xff (i32 extract_vector_elt (zext load <4 x i8> to 4 x i16)))
;
; The and is not redundant and cannot be removed. Since
; extract_vector_elt is doing an implicit any_ext, the and
; is required to guarantee that the top bits are set to zero.
; Ideally should be a zext from <4 x i8> to <4 x 32>.
; Make sure we generate zext from <4 x i8> to <4 x 32>.
; CHECK-LABEL: h:
; CHECK: vld1.32
; CHECK: uxtb
; CHECK: vmovl.u8 q8, d16
; CHECK: vmovl.u16 q8, d16
; CHECK: vmov r0, r1, d16
; CHECK: vmov r2, r3, d17
define <4 x i32> @h(<4 x i8> *%in) {
%1 = load <4 x i8>, <4 x i8>* %in, align 4
%2 = extractelement <4 x i8> %1, i32 0

View File

@ -167,37 +167,34 @@ define <4 x i64> @_mul4xi32toi64a(<4 x i32>, <4 x i32>) {
;
; SSE42-LABEL: _mul4xi32toi64a:
; SSE42: # %bb.0:
; SSE42-NEXT: pxor %xmm3, %xmm3
; SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
; SSE42-NEXT: pmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero
; SSE42-NEXT: pmovzxdq {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero
; SSE42-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
; SSE42-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
; SSE42-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
; SSE42-NEXT: pmuludq %xmm0, %xmm1
; SSE42-NEXT: pmuludq %xmm4, %xmm2
; SSE42-NEXT: movdqa %xmm2, %xmm0
; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
; SSE42-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero
; SSE42-NEXT: pmuludq %xmm3, %xmm2
; SSE42-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
; SSE42-NEXT: pmuludq %xmm4, %xmm0
; SSE42-NEXT: movdqa %xmm2, %xmm1
; SSE42-NEXT: retq
;
; AVX1-LABEL: _mul4xi32toi64a:
; AVX1: # %bb.0:
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm3 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; AVX1-NEXT: vpmuludq %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm1[0],zero,xmm1[1],zero
; AVX1-NEXT: vpmuludq %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: _mul4xi32toi64a:
; AVX2: # %bb.0:
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm3 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
%f00 = extractelement <4 x i32> %0, i32 0