forked from OSchip/llvm-project
[DAGCombine] Recognise any_extend_vector_inreg and truncation style shuffle masks
During legalization we are often creating shuffles (via a build_vector scalarization stage) that are "any_extend_vector_inreg" style masks, and also other masks that are the equivalent of "truncate_vector_inreg" (if we had such a thing). This patch is an attempt to match these cases to help undo the effects of just leaving shuffle lowering to handle it - which typically means we lose track of the undefined elements of the shuffles resulting in an unnecessary extension+truncation stage for widened illegal types. The 2011-10-21-widen-cmp.ll regression will be fixed by making SIGN_EXTEND_VECTOR_IN_REG legal in SSE instead of lowering them to X86ISD::VSEXT (PR31712). Differential Revision: https://reviews.llvm.org/D29454 llvm-svn: 295451
This commit is contained in:
parent
7b35b78160
commit
511d788a95
|
@ -7530,6 +7530,16 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
|
|||
return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
|
||||
}
|
||||
|
||||
// fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_in_reg x)
|
||||
if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
|
||||
N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
|
||||
N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
|
||||
N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
|
||||
if (!LegalOperations ||
|
||||
TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
|
||||
return DAG.getSignExtendVectorInReg(N0.getOperand(0), SDLoc(N), VT);
|
||||
}
|
||||
|
||||
// fold (sext_in_reg (zext x)) -> (sext x)
|
||||
// iff we are extending the source sign bit.
|
||||
if (N0.getOpcode() == ISD::ZERO_EXTEND) {
|
||||
|
@ -14194,6 +14204,113 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
|
|||
return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
|
||||
}
|
||||
|
||||
// Match shuffles that can be converted to any_vector_extend_in_reg.
|
||||
// This is often generated during legalization.
|
||||
// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
|
||||
// TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
|
||||
SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
|
||||
SelectionDAG &DAG,
|
||||
const TargetLowering &TLI,
|
||||
bool LegalOperations) {
|
||||
EVT VT = SVN->getValueType(0);
|
||||
bool IsBigEndian = DAG.getDataLayout().isBigEndian();
|
||||
|
||||
// TODO Add support for big-endian when we have a test case.
|
||||
if (!VT.isInteger() || IsBigEndian)
|
||||
return SDValue();
|
||||
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
unsigned EltSizeInBits = VT.getScalarSizeInBits();
|
||||
ArrayRef<int> Mask = SVN->getMask();
|
||||
SDValue N0 = SVN->getOperand(0);
|
||||
|
||||
// shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
|
||||
auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
|
||||
for (unsigned i = 0; i != NumElts; ++i) {
|
||||
if (Mask[i] < 0)
|
||||
continue;
|
||||
if ((i % Scale) == 0 && Mask[i] == (i / Scale))
|
||||
continue;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
// Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
|
||||
// power-of-2 extensions as they are the most likely.
|
||||
for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
|
||||
if (!isAnyExtend(Scale))
|
||||
continue;
|
||||
|
||||
EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
|
||||
EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
|
||||
if (!LegalOperations ||
|
||||
TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
|
||||
return DAG.getBitcast(VT,
|
||||
DAG.getAnyExtendVectorInReg(N0, SDLoc(SVN), OutVT));
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
|
||||
// each source element of a large type into the lowest elements of a smaller
|
||||
// destination type. This is often generated during legalization.
|
||||
// If the source node itself was a '*_extend_vector_inreg' node then we should
|
||||
// then be able to remove it.
|
||||
SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG) {
|
||||
EVT VT = SVN->getValueType(0);
|
||||
bool IsBigEndian = DAG.getDataLayout().isBigEndian();
|
||||
|
||||
// TODO Add support for big-endian when we have a test case.
|
||||
if (!VT.isInteger() || IsBigEndian)
|
||||
return SDValue();
|
||||
|
||||
SDValue N0 = SVN->getOperand(0);
|
||||
while (N0.getOpcode() == ISD::BITCAST)
|
||||
N0 = N0.getOperand(0);
|
||||
|
||||
unsigned Opcode = N0.getOpcode();
|
||||
if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
|
||||
Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
|
||||
Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
|
||||
return SDValue();
|
||||
|
||||
SDValue N00 = N0.getOperand(0);
|
||||
ArrayRef<int> Mask = SVN->getMask();
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
unsigned EltSizeInBits = VT.getScalarSizeInBits();
|
||||
unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
|
||||
|
||||
// (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
|
||||
// (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
|
||||
// (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
|
||||
auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
|
||||
for (unsigned i = 0; i != NumElts; ++i) {
|
||||
if (Mask[i] < 0)
|
||||
continue;
|
||||
if ((i * Scale) < NumElts && Mask[i] == (i * Scale))
|
||||
continue;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
// At the moment we just handle the case where we've truncated back to the
|
||||
// same size as before the extension.
|
||||
// TODO: handle more extension/truncation cases as cases arise.
|
||||
if (EltSizeInBits != ExtSrcSizeInBits)
|
||||
return SDValue();
|
||||
|
||||
// Attempt to match a 'truncate_vector_inreg' shuffle, we just search for
|
||||
// power-of-2 truncations as they are the most likely.
|
||||
for (unsigned Scale = 2; Scale < NumElts; Scale *= 2)
|
||||
if (isTruncate(Scale))
|
||||
return DAG.getBitcast(VT, N00);
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
|
||||
EVT VT = N->getValueType(0);
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
|
@ -14298,6 +14415,14 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
|
|||
if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG))
|
||||
return S;
|
||||
|
||||
// Match shuffles that can be converted to any_vector_extend_in_reg.
|
||||
if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
|
||||
return V;
|
||||
|
||||
// Combine "truncate_vector_in_reg" style shuffles.
|
||||
if (SDValue V = combineTruncationShuffle(SVN, DAG))
|
||||
return V;
|
||||
|
||||
if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
|
||||
Level < AfterLegalizeVectorOps &&
|
||||
(N1.isUndef() ||
|
||||
|
|
|
@ -9,7 +9,8 @@ define void @cmp_2_floats(<2 x float> %a, <2 x float> %b) {
|
|||
; CHECK: # BB#0: # %entry
|
||||
; CHECK-NEXT: movaps %xmm0, %xmm2
|
||||
; CHECK-NEXT: cmpordps %xmm0, %xmm0
|
||||
; CHECK-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],zero,xmm0[1]
|
||||
; CHECK-NEXT: pmovsxdq %xmm0, %xmm0
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; CHECK-NEXT: pslld $31, %xmm0
|
||||
; CHECK-NEXT: blendvps %xmm0, %xmm2, %xmm1
|
||||
; CHECK-NEXT: movlps %xmm1, (%rax)
|
||||
|
|
|
@ -1075,7 +1075,6 @@ define <2 x float> @sitofp_2i1_float(<2 x float> %a) {
|
|||
; NOVL: ## BB#0:
|
||||
; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; NOVL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
|
||||
; NOVL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],zero,xmm0[1]
|
||||
; NOVL-NEXT: vcvtdq2ps %xmm0, %xmm0
|
||||
; NOVL-NEXT: retq
|
||||
;
|
||||
|
|
|
@ -1226,11 +1226,7 @@ define <2 x i64> @test46(<2 x float> %x, <2 x float> %y) #0 {
|
|||
; KNL-LABEL: test46:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
|
||||
; KNL-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; KNL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
||||
; KNL-NEXT: vpsrad $31, %xmm0, %xmm1
|
||||
; KNL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
||||
; KNL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
|
||||
; KNL-NEXT: vpmovsxdq %xmm0, %xmm0
|
||||
; KNL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
|
|
|
@ -1277,8 +1277,6 @@ define <2 x float> @test_maxps_illegal_v2f32(<2 x float> %x, <2 x float> %y) {
|
|||
; STRICT-NEXT: movaps %xmm0, %xmm2
|
||||
; STRICT-NEXT: movaps %xmm1, %xmm0
|
||||
; STRICT-NEXT: cmpleps %xmm2, %xmm0
|
||||
; STRICT-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],zero,xmm0[1]
|
||||
; STRICT-NEXT: pslld $31, %xmm0
|
||||
; STRICT-NEXT: blendvps %xmm0, %xmm2, %xmm1
|
||||
; STRICT-NEXT: movaps %xmm1, %xmm0
|
||||
; STRICT-NEXT: retq
|
||||
|
@ -1297,8 +1295,6 @@ define <2 x float> @test_minps_illegal_v2f32(<2 x float> %x, <2 x float> %y) {
|
|||
; STRICT: # BB#0:
|
||||
; STRICT-NEXT: movaps %xmm0, %xmm2
|
||||
; STRICT-NEXT: cmpleps %xmm1, %xmm0
|
||||
; STRICT-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],zero,xmm0[1]
|
||||
; STRICT-NEXT: pslld $31, %xmm0
|
||||
; STRICT-NEXT: blendvps %xmm0, %xmm2, %xmm1
|
||||
; STRICT-NEXT: movaps %xmm1, %xmm0
|
||||
; STRICT-NEXT: retq
|
||||
|
|
Loading…
Reference in New Issue