forked from OSchip/llvm-project
Custom lower arbitrary VECTOR_SHUFFLE's to VPERM.
TODO: leave specific ones as VECTOR_SHUFFLE's and turn them into specialized operations like vsplt* llvm-svn: 26887
This commit is contained in:
parent
0a8b4eaee9
commit
a8713b1ee6
|
@ -167,6 +167,11 @@ PPCTargetLowering::PPCTargetLowering(TargetMachine &TM)
|
|||
setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand);
|
||||
setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);
|
||||
setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand);
|
||||
setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand);
|
||||
|
||||
// FIXME: We don't support any BUILD_VECTOR's yet. We should custom expand
|
||||
// the ones we do, like splat(0.0) and splat(-0.0).
|
||||
setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Expand);
|
||||
}
|
||||
|
||||
if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) {
|
||||
|
@ -179,11 +184,11 @@ PPCTargetLowering::PPCTargetLowering(TargetMachine &TM)
|
|||
setOperationAction(ISD::LOAD , MVT::v4f32, Legal);
|
||||
setOperationAction(ISD::ADD , MVT::v4i32, Legal);
|
||||
setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
|
||||
// FIXME: We don't support any BUILD_VECTOR's yet. We should custom expand
|
||||
// the ones we do!
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Expand);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Expand);
|
||||
|
||||
setOperationAction(ISD::LOAD , MVT::v16i8, Legal);
|
||||
|
||||
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i32, Custom);
|
||||
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
|
||||
|
||||
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
|
||||
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
|
||||
}
|
||||
|
@ -209,6 +214,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
case PPCISD::VMADDFP: return "PPCISD::VMADDFP";
|
||||
case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
|
||||
case PPCISD::LVE_X: return "PPCISD::LVE_X";
|
||||
case PPCISD::VPERM: return "PPCISD::VPERM";
|
||||
case PPCISD::Hi: return "PPCISD::Hi";
|
||||
case PPCISD::Lo: return "PPCISD::Lo";
|
||||
case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
|
||||
|
@ -566,6 +572,36 @@ SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
|
|||
return DAG.getNode(PPCISD::LVE_X, Op.getValueType(), Store, FIdx,
|
||||
DAG.getSrcValue(NULL));
|
||||
}
|
||||
case ISD::VECTOR_SHUFFLE: {
|
||||
// FIXME: Cases that are handled by instructions that take permute
|
||||
// immediates (such as vsplt*) shouldn't be lowered here! Also handle cases
|
||||
// that are cheaper to do as multiple such instructions than as a constant
|
||||
// pool load/vperm pair.
|
||||
|
||||
// Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
|
||||
// vector that will get spilled to the constant pool.
|
||||
SDOperand V1 = Op.getOperand(0);
|
||||
SDOperand V2 = Op.getOperand(1);
|
||||
if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
|
||||
SDOperand PermMask = Op.getOperand(2);
|
||||
|
||||
// The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
|
||||
// that it is in input element units, not in bytes. Convert now.
|
||||
MVT::ValueType EltVT = MVT::getVectorBaseType(V1.getValueType());
|
||||
unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
|
||||
|
||||
std::vector<SDOperand> ResultMask;
|
||||
for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
|
||||
unsigned SrcElt =cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
|
||||
|
||||
for (unsigned j = 0; j != BytesPerElement; ++j)
|
||||
ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
|
||||
MVT::i8));
|
||||
}
|
||||
|
||||
SDOperand VPermMask =DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, ResultMask);
|
||||
return DAG.getNode(PPCISD::VPERM, V1.getValueType(), V1, V2, VPermMask);
|
||||
}
|
||||
}
|
||||
return SDOperand();
|
||||
}
|
||||
|
|
|
@ -56,6 +56,10 @@ namespace llvm {
|
|||
/// the third is the SRCVALUE node.
|
||||
LVE_X,
|
||||
|
||||
/// VPERM - The PPC VPERM Instruction.
|
||||
///
|
||||
VPERM,
|
||||
|
||||
/// Hi/Lo - These represent the high and low 16-bit parts of a global
|
||||
/// address respectively. These nodes have two operands, the first of
|
||||
/// which must be a TargetGlobalAddress, and the second of which must be a
|
||||
|
|
|
@ -26,6 +26,10 @@ def SDT_PPCShiftOp : SDTypeProfile<1, 2, [ // PPCshl, PPCsra, PPCsrl
|
|||
def SDT_PPCCallSeq : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>;
|
||||
def SDT_PPCRetFlag : SDTypeProfile<0, 0, []>;
|
||||
|
||||
def SDT_PPCvperm : SDTypeProfile<1, 3, [
|
||||
SDTCisVT<3, v16i8>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>
|
||||
]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// PowerPC specific DAG Nodes.
|
||||
//
|
||||
|
@ -46,6 +50,7 @@ def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>;
|
|||
def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>;
|
||||
|
||||
def PPClve_x : SDNode<"PPCISD::LVE_X", SDTLoad, [SDNPHasChain]>;
|
||||
def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
|
||||
|
||||
// These nodes represent the 32-bit PPC shifts that operate on 6-bit shift
|
||||
// amounts. These nodes are generated by the multi-precision shift code.
|
||||
|
@ -118,15 +123,6 @@ def imm16Shifted : PatLeaf<(imm), [{
|
|||
return ((unsigned)N->getValue() & 0xFFFF0000U) == (unsigned)N->getValue();
|
||||
}], HI16>;
|
||||
|
||||
/*
|
||||
// Example of a legalize expander: Only for PPC64.
|
||||
def : Expander<(set i64:$dst, (fp_to_sint f64:$src)),
|
||||
[(set f64:$tmp , (FCTIDZ f64:$src)),
|
||||
(set i32:$tmpFI, (CreateNewFrameIndex 8, 8)),
|
||||
(store f64:$tmp, i32:$tmpFI),
|
||||
(set i64:$dst, (load i32:$tmpFI))],
|
||||
Subtarget_PPC64>;
|
||||
*/
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// PowerPC Flag Definitions.
|
||||
|
@ -956,7 +952,9 @@ def VNMSUBFP: VAForm_1<47, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB, VRRC:$vC),
|
|||
Requires<[FPContractions]>;
|
||||
|
||||
def VPERM : VAForm_1<43, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB, VRRC:$vC),
|
||||
"vperm $vD, $vA, $vC, $vB", VecFP, []>;
|
||||
"vperm $vD, $vA, $vC, $vB", VecFP,
|
||||
[(set VRRC:$vD,
|
||||
(PPCvperm (v4f32 VRRC:$vA), VRRC:$vB, VRRC:$vC))]>;
|
||||
|
||||
|
||||
// VX-Form instructions. AltiVec arithmetic ops.
|
||||
|
@ -1153,6 +1151,13 @@ def : Pat<(f64 (extload xaddr:$src, f32)),
|
|||
|
||||
def : Pat<(v4i32 (load xoaddr:$src)),
|
||||
(v4i32 (LVX xoaddr:$src))>;
|
||||
def : Pat<(v16i8 (load xoaddr:$src)),
|
||||
(v16i8 (LVX xoaddr:$src))>;
|
||||
|
||||
|
||||
def : Pat<(PPCvperm (v4i32 VRRC:$vA), VRRC:$vB, VRRC:$vC),
|
||||
(v4i32 (VPERM VRRC:$vA, VRRC:$vB, VRRC:$vC))>;
|
||||
|
||||
def : Pat<(store (v4i32 VRRC:$rS), xoaddr:$dst),
|
||||
(STVX (v4i32 VRRC:$rS), xoaddr:$dst)>;
|
||||
def : Pat<(v4i32 (PPClve_x xoaddr:$src)),
|
||||
|
|
Loading…
Reference in New Issue