forked from OSchip/llvm-project
[X86] SimplifyDemandedVectorEltsForTargetNode - merge shuffle/pack lower demanded elements handling.
Generalize the vector operand extraction code for shuffle/pack ops - we can assume that the vector operands are the same width as the result, and any non-vector values can be reused directly in the smaller width op.
This commit is contained in:
parent
d698ff92a5
commit
393b4bd136
|
@ -37053,18 +37053,6 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
|
|||
ExtSizeInBits = SizeInBits / 4;
|
||||
|
||||
switch (Opc) {
|
||||
// Zero upper elements.
|
||||
case X86ISD::VZEXT_MOVL: {
|
||||
SDLoc DL(Op);
|
||||
SDValue Ext0 =
|
||||
extractSubVector(Op.getOperand(0), 0, TLO.DAG, DL, ExtSizeInBits);
|
||||
SDValue ExtOp =
|
||||
TLO.DAG.getNode(Opc, DL, Ext0.getValueType(), Ext0);
|
||||
SDValue UndefVec = TLO.DAG.getUNDEF(VT);
|
||||
SDValue Insert =
|
||||
insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
|
||||
return TLO.CombineTo(Op, Insert);
|
||||
}
|
||||
// Subvector broadcast.
|
||||
case X86ISD::SUBV_BROADCAST: {
|
||||
SDLoc DL(Op);
|
||||
|
@ -37080,11 +37068,6 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
|
|||
return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Src, 0,
|
||||
TLO.DAG, DL, ExtSizeInBits));
|
||||
}
|
||||
// Target unary shuffles by immediate:
|
||||
case X86ISD::PSHUFD:
|
||||
case X86ISD::PSHUFLW:
|
||||
case X86ISD::PSHUFHW:
|
||||
case X86ISD::VPERMILPI:
|
||||
// Byte shifts by immediate.
|
||||
case X86ISD::VSHLDQ:
|
||||
case X86ISD::VSRLDQ:
|
||||
|
@ -37122,11 +37105,20 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
|
|||
}
|
||||
break;
|
||||
}
|
||||
// Zero upper elements.
|
||||
case X86ISD::VZEXT_MOVL:
|
||||
// Target unary shuffles by immediate:
|
||||
case X86ISD::PSHUFD:
|
||||
case X86ISD::PSHUFLW:
|
||||
case X86ISD::PSHUFHW:
|
||||
case X86ISD::VPERMILPI:
|
||||
// (Non-Lane Crossing) Target Shuffles.
|
||||
case X86ISD::VPERMILPV:
|
||||
case X86ISD::VPERMIL2:
|
||||
case X86ISD::PSHUFB:
|
||||
case X86ISD::UNPCKL:
|
||||
case X86ISD::UNPCKH:
|
||||
case X86ISD::BLENDI:
|
||||
// Saturated Packs.
|
||||
case X86ISD::PACKSS:
|
||||
case X86ISD::PACKUS:
|
||||
|
@ -37136,49 +37128,20 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
|
|||
case X86ISD::FHADD:
|
||||
case X86ISD::FHSUB: {
|
||||
SDLoc DL(Op);
|
||||
SmallVector<SDValue, 4> Ops;
|
||||
for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
|
||||
SDValue SrcOp = Op.getOperand(i);
|
||||
EVT SrcVT = SrcOp.getValueType();
|
||||
assert((!SrcVT.isVector() || SrcVT.getSizeInBits() == SizeInBits) &&
|
||||
"Unsupported vector size");
|
||||
Ops.push_back(SrcVT.isVector() ? extractSubVector(SrcOp, 0, TLO.DAG, DL,
|
||||
ExtSizeInBits)
|
||||
: SrcOp);
|
||||
}
|
||||
MVT ExtVT = VT.getSimpleVT();
|
||||
ExtVT = MVT::getVectorVT(ExtVT.getScalarType(),
|
||||
ExtSizeInBits / ExtVT.getScalarSizeInBits());
|
||||
SDValue Ext0 =
|
||||
extractSubVector(Op.getOperand(0), 0, TLO.DAG, DL, ExtSizeInBits);
|
||||
SDValue Ext1 =
|
||||
extractSubVector(Op.getOperand(1), 0, TLO.DAG, DL, ExtSizeInBits);
|
||||
SDValue ExtOp = TLO.DAG.getNode(Opc, DL, ExtVT, Ext0, Ext1);
|
||||
SDValue UndefVec = TLO.DAG.getUNDEF(VT);
|
||||
SDValue Insert =
|
||||
insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
|
||||
return TLO.CombineTo(Op, Insert);
|
||||
}
|
||||
// Vector blend by immediate.
|
||||
case X86ISD::BLENDI: {
|
||||
SDLoc DL(Op);
|
||||
MVT ExtVT = VT.getSimpleVT();
|
||||
ExtVT = MVT::getVectorVT(ExtVT.getScalarType(),
|
||||
ExtSizeInBits / ExtVT.getScalarSizeInBits());
|
||||
SDValue Ext0 =
|
||||
extractSubVector(Op.getOperand(0), 0, TLO.DAG, DL, ExtSizeInBits);
|
||||
SDValue Ext1 =
|
||||
extractSubVector(Op.getOperand(1), 0, TLO.DAG, DL, ExtSizeInBits);
|
||||
SDValue ExtOp =
|
||||
TLO.DAG.getNode(Opc, DL, ExtVT, Ext0, Ext1, Op.getOperand(2));
|
||||
SDValue UndefVec = TLO.DAG.getUNDEF(VT);
|
||||
SDValue Insert =
|
||||
insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
|
||||
return TLO.CombineTo(Op, Insert);
|
||||
}
|
||||
case X86ISD::VPERMIL2: {
|
||||
SDLoc DL(Op);
|
||||
MVT ExtVT = VT.getSimpleVT();
|
||||
ExtVT = MVT::getVectorVT(ExtVT.getScalarType(),
|
||||
ExtSizeInBits / ExtVT.getScalarSizeInBits());
|
||||
SDValue Ext0 =
|
||||
extractSubVector(Op.getOperand(0), 0, TLO.DAG, DL, ExtSizeInBits);
|
||||
SDValue Ext1 =
|
||||
extractSubVector(Op.getOperand(1), 0, TLO.DAG, DL, ExtSizeInBits);
|
||||
SDValue Ext2 =
|
||||
extractSubVector(Op.getOperand(2), 0, TLO.DAG, DL, ExtSizeInBits);
|
||||
SDValue ExtOp =
|
||||
TLO.DAG.getNode(Opc, DL, ExtVT, Ext0, Ext1, Ext2, Op.getOperand(3));
|
||||
SDValue ExtOp = TLO.DAG.getNode(Opc, DL, ExtVT, Ops);
|
||||
SDValue UndefVec = TLO.DAG.getUNDEF(VT);
|
||||
SDValue Insert =
|
||||
insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
|
||||
|
|
Loading…
Reference in New Issue