[X86] SimplifyDemandedVectorEltsForTargetNode - merge shuffle/pack lower demanded elements handling.

Generalize the vector operand extraction code for shuffle/pack ops - we can assume that the vector operands are the same width as the result, and any non-vector values can be reused directly in the smaller width op.
This commit is contained in:
Simon Pilgrim 2020-06-27 19:10:13 +01:00
parent d698ff92a5
commit 393b4bd136
1 changed files with 20 additions and 57 deletions

View File

@ -37053,18 +37053,6 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
ExtSizeInBits = SizeInBits / 4; ExtSizeInBits = SizeInBits / 4;
switch (Opc) { switch (Opc) {
// Zero upper elements.
case X86ISD::VZEXT_MOVL: {
SDLoc DL(Op);
SDValue Ext0 =
extractSubVector(Op.getOperand(0), 0, TLO.DAG, DL, ExtSizeInBits);
SDValue ExtOp =
TLO.DAG.getNode(Opc, DL, Ext0.getValueType(), Ext0);
SDValue UndefVec = TLO.DAG.getUNDEF(VT);
SDValue Insert =
insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
return TLO.CombineTo(Op, Insert);
}
// Subvector broadcast. // Subvector broadcast.
case X86ISD::SUBV_BROADCAST: { case X86ISD::SUBV_BROADCAST: {
SDLoc DL(Op); SDLoc DL(Op);
@ -37080,11 +37068,6 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Src, 0, return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Src, 0,
TLO.DAG, DL, ExtSizeInBits)); TLO.DAG, DL, ExtSizeInBits));
} }
// Target unary shuffles by immediate:
case X86ISD::PSHUFD:
case X86ISD::PSHUFLW:
case X86ISD::PSHUFHW:
case X86ISD::VPERMILPI:
// Byte shifts by immediate. // Byte shifts by immediate.
case X86ISD::VSHLDQ: case X86ISD::VSHLDQ:
case X86ISD::VSRLDQ: case X86ISD::VSRLDQ:
@ -37122,11 +37105,20 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
} }
break; break;
} }
// Zero upper elements.
case X86ISD::VZEXT_MOVL:
// Target unary shuffles by immediate:
case X86ISD::PSHUFD:
case X86ISD::PSHUFLW:
case X86ISD::PSHUFHW:
case X86ISD::VPERMILPI:
// (Non-Lane Crossing) Target Shuffles. // (Non-Lane Crossing) Target Shuffles.
case X86ISD::VPERMILPV: case X86ISD::VPERMILPV:
case X86ISD::VPERMIL2:
case X86ISD::PSHUFB: case X86ISD::PSHUFB:
case X86ISD::UNPCKL: case X86ISD::UNPCKL:
case X86ISD::UNPCKH: case X86ISD::UNPCKH:
case X86ISD::BLENDI:
// Saturated Packs. // Saturated Packs.
case X86ISD::PACKSS: case X86ISD::PACKSS:
case X86ISD::PACKUS: case X86ISD::PACKUS:
@ -37136,49 +37128,20 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
case X86ISD::FHADD: case X86ISD::FHADD:
case X86ISD::FHSUB: { case X86ISD::FHSUB: {
SDLoc DL(Op); SDLoc DL(Op);
SmallVector<SDValue, 4> Ops;
for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
SDValue SrcOp = Op.getOperand(i);
EVT SrcVT = SrcOp.getValueType();
assert((!SrcVT.isVector() || SrcVT.getSizeInBits() == SizeInBits) &&
"Unsupported vector size");
Ops.push_back(SrcVT.isVector() ? extractSubVector(SrcOp, 0, TLO.DAG, DL,
ExtSizeInBits)
: SrcOp);
}
MVT ExtVT = VT.getSimpleVT(); MVT ExtVT = VT.getSimpleVT();
ExtVT = MVT::getVectorVT(ExtVT.getScalarType(), ExtVT = MVT::getVectorVT(ExtVT.getScalarType(),
ExtSizeInBits / ExtVT.getScalarSizeInBits()); ExtSizeInBits / ExtVT.getScalarSizeInBits());
SDValue Ext0 = SDValue ExtOp = TLO.DAG.getNode(Opc, DL, ExtVT, Ops);
extractSubVector(Op.getOperand(0), 0, TLO.DAG, DL, ExtSizeInBits);
SDValue Ext1 =
extractSubVector(Op.getOperand(1), 0, TLO.DAG, DL, ExtSizeInBits);
SDValue ExtOp = TLO.DAG.getNode(Opc, DL, ExtVT, Ext0, Ext1);
SDValue UndefVec = TLO.DAG.getUNDEF(VT);
SDValue Insert =
insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
return TLO.CombineTo(Op, Insert);
}
// Vector blend by immediate.
case X86ISD::BLENDI: {
SDLoc DL(Op);
MVT ExtVT = VT.getSimpleVT();
ExtVT = MVT::getVectorVT(ExtVT.getScalarType(),
ExtSizeInBits / ExtVT.getScalarSizeInBits());
SDValue Ext0 =
extractSubVector(Op.getOperand(0), 0, TLO.DAG, DL, ExtSizeInBits);
SDValue Ext1 =
extractSubVector(Op.getOperand(1), 0, TLO.DAG, DL, ExtSizeInBits);
SDValue ExtOp =
TLO.DAG.getNode(Opc, DL, ExtVT, Ext0, Ext1, Op.getOperand(2));
SDValue UndefVec = TLO.DAG.getUNDEF(VT);
SDValue Insert =
insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
return TLO.CombineTo(Op, Insert);
}
case X86ISD::VPERMIL2: {
SDLoc DL(Op);
MVT ExtVT = VT.getSimpleVT();
ExtVT = MVT::getVectorVT(ExtVT.getScalarType(),
ExtSizeInBits / ExtVT.getScalarSizeInBits());
SDValue Ext0 =
extractSubVector(Op.getOperand(0), 0, TLO.DAG, DL, ExtSizeInBits);
SDValue Ext1 =
extractSubVector(Op.getOperand(1), 0, TLO.DAG, DL, ExtSizeInBits);
SDValue Ext2 =
extractSubVector(Op.getOperand(2), 0, TLO.DAG, DL, ExtSizeInBits);
SDValue ExtOp =
TLO.DAG.getNode(Opc, DL, ExtVT, Ext0, Ext1, Ext2, Op.getOperand(3));
SDValue UndefVec = TLO.DAG.getUNDEF(VT); SDValue UndefVec = TLO.DAG.getUNDEF(VT);
SDValue Insert = SDValue Insert =
insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits); insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);