From a9a1313386f65f9380ea16c20c63aaa832027456 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Mon, 20 Mar 2006 06:51:10 +0000 Subject: [PATCH] Add support for generating vspltw, instead of a vperm instruction with a constant pool load. This generates significantly nicer code for splats. When tblgen gets bugfixed, we can remove the custom selection code. llvm-svn: 26898 --- llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 16 ++++++++++++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 28 ++++++++++++++------- llvm/lib/Target/PowerPC/PPCInstrInfo.td | 10 +++++--- 3 files changed, 41 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 7ddf8c0104dd..39f544692687 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -927,6 +927,22 @@ void PPCDAGToDAGISel::Select(SDOperand &Result, SDOperand Op) { switch (N->getOpcode()) { default: break; + case ISD::VECTOR_SHUFFLE: + // FIXME: This should be autogenerated from the .td file, it is here for now + // due to bugs in tblgen. + if (Op.getOperand(1).getOpcode() == ISD::UNDEF && + (Op.getValueType() == MVT::v4f32 || Op.getValueType() == MVT::v4i32) && + PPC::isSplatShuffleMask(Op.getOperand(2).Val)) { + SDOperand N0; + Select(N0, N->getOperand(0)); + + Result = CodeGenMap[Op] = + SDOperand(CurDAG->getTargetNode(PPC::VSPLTW, MVT::v4f32, + getI32Imm(PPC::getVSPLTImmediate(Op.getOperand(2).Val)), + N0), 0); + return; + } + assert(0 && "ILLEGAL VECTOR_SHUFFLE!"); case ISD::SETCC: Result = SelectSETCC(Op); return; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index eeed0dfe742a..ee41ed13b464 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -245,6 +245,12 @@ static bool isFloatingPointZero(SDOperand Op) { /// VSPLTB/VSPLTH/VSPLTW. bool PPC::isSplatShuffleMask(SDNode *N) { assert(N->getOpcode() == ISD::BUILD_VECTOR); + + // We can only splat 8-bit, 16-bit, and 32-bit quantities. + if (N->getNumOperands() != 4 && N->getNumOperands() != 8 && + N->getNumOperands() != 16) + return false; + // This is a splat operation if each element of the permute is the same, and // if the value doesn't reference the second vector. SDOperand Elt = N->getOperand(0); @@ -263,11 +269,10 @@ bool PPC::isSplatShuffleMask(SDNode *N) { /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. unsigned PPC::getVSPLTImmediate(SDNode *N) { assert(isSplatShuffleMask(N)); - return cast(N)->getValue(); + return cast(N->getOperand(0))->getValue(); } - /// LowerOperation - Provide custom lowering hooks for some operations. /// SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { @@ -602,17 +607,22 @@ SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { DAG.getSrcValue(NULL)); } case ISD::VECTOR_SHUFFLE: { - // FIXME: Cases that are handled by instructions that take permute - // immediates (such as vsplt*) shouldn't be lowered here! Also handle cases - // that are cheaper to do as multiple such instructions than as a constant - // pool load/vperm pair. + SDOperand V1 = Op.getOperand(0); + SDOperand V2 = Op.getOperand(1); + SDOperand PermMask = Op.getOperand(2); + + // Cases that are handled by instructions that take permute immediates + // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be + // selected by the instruction selector. + if (PPC::isSplatShuffleMask(PermMask.Val) && V2.getOpcode() == ISD::UNDEF) + break; + + // TODO: Handle more cases, and also handle cases that are cheaper to do as + // multiple such instructions than as a constant pool load/vperm pair. // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant // vector that will get spilled to the constant pool. - SDOperand V1 = Op.getOperand(0); - SDOperand V2 = Op.getOperand(1); if (V2.getOpcode() == ISD::UNDEF) V2 = V1; - SDOperand PermMask = Op.getOperand(2); // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except // that it is in input element units, not in bytes. Convert now. diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 2e63119c9c8f..66e89dc09dcd 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -1032,10 +1032,12 @@ def VSPLTH : VXForm_1<588, (ops VRRC:$vD, u5imm:$UIMM, VRRC:$vB), "vsplth $vD, $vB, $UIMM", VecPerm, []>; -//def VSPLTW : VXForm_1<652, (ops VRRC:$vD, u5imm:$UIMM, VRRC:$vB), -// "vspltw $vD, $vB, $UIMM", VecPerm, -// [(set VRRC:$vD, (vector_shuffle (v4f32 VRRC:$vB), (undef), -// VSPLT_shuffle_mask:$UIMM))]>; +def VSPLTW : VXForm_1<652, (ops VRRC:$vD, u5imm:$UIMM, VRRC:$vB), + "vspltw $vD, $vB, $UIMM", VecPerm, + [/* + (set VRRC:$vD, (vector_shuffle (v4f32 VRRC:$vB), (undef), + VSPLT_shuffle_mask:$UIMM))*/]>; + // FIXME: ALSO ADD SUPPORT FOR v4i32! // VX-Form Pseudo Instructions