Custom lower arbitrary VECTOR_SHUFFLE's to VPERM.

TODO: leave specific ones as VECTOR_SHUFFLE's and turn them into specialized operations like vsplt* llvm-svn: 26887
2006-03-20 01:53:53 +00:00 · 2006-03-20 01:53:53 +00:00 · a8713b1ee6
parent 0a8b4eaee9
commit a8713b1ee6
3 changed files with 60 additions and 15 deletions
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@ -167,6 +167,11 @@ PPCTargetLowering::PPCTargetLowering(TargetMachine &TM)
    setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand);
    setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);
    setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand);
+    setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand);
+    
+    // FIXME: We don't support any BUILD_VECTOR's yet.  We should custom expand
+    // the ones we do, like splat(0.0) and splat(-0.0).
+    setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Expand);
  }

  if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) {
@ -179,11 +184,11 @@ PPCTargetLowering::PPCTargetLowering(TargetMachine &TM)
    setOperationAction(ISD::LOAD       , MVT::v4f32, Legal);
    setOperationAction(ISD::ADD        , MVT::v4i32, Legal);
    setOperationAction(ISD::LOAD       , MVT::v4i32, Legal);
-    // FIXME: We don't support any BUILD_VECTOR's yet.  We should custom expand
-    // the ones we do!
-    setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Expand);
-    setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Expand);
-    
+    setOperationAction(ISD::LOAD       , MVT::v16i8, Legal);
+
+    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i32, Custom);
+    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
+
    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
  }
@ -209,6 +214,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
  case PPCISD::VMADDFP:       return "PPCISD::VMADDFP";
  case PPCISD::VNMSUBFP:      return "PPCISD::VNMSUBFP";
  case PPCISD::LVE_X:         return "PPCISD::LVE_X";
+  case PPCISD::VPERM:         return "PPCISD::VPERM";
  case PPCISD::Hi:            return "PPCISD::Hi";
  case PPCISD::Lo:            return "PPCISD::Lo";
  case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
@ -566,6 +572,36 @@ SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
    return DAG.getNode(PPCISD::LVE_X, Op.getValueType(), Store, FIdx, 
                       DAG.getSrcValue(NULL));
  }
+  case ISD::VECTOR_SHUFFLE: {
+    // FIXME: Cases that are handled by instructions that take permute
+    // immediates (such as vsplt*) shouldn't be lowered here!  Also handle cases
+    // that are cheaper to do as multiple such instructions than as a constant
+    // pool load/vperm pair.
+    
+    // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
+    // vector that will get spilled to the constant pool.
+    SDOperand V1 = Op.getOperand(0);
+    SDOperand V2 = Op.getOperand(1);
+    if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
+    SDOperand PermMask = Op.getOperand(2);
+    
+    // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
+    // that it is in input element units, not in bytes.  Convert now.
+    MVT::ValueType EltVT = MVT::getVectorBaseType(V1.getValueType());
+    unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
+    
+    std::vector<SDOperand> ResultMask;
+    for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
+      unsigned SrcElt =cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
+      
+      for (unsigned j = 0; j != BytesPerElement; ++j)
+        ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
+                                             MVT::i8));
+    }
+    
+    SDOperand VPermMask =DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, ResultMask);
+    return DAG.getNode(PPCISD::VPERM, V1.getValueType(), V1, V2, VPermMask);
+  }
  }
  return SDOperand();
 }
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@ -56,6 +56,10 @@ namespace llvm {
      /// the third is the SRCVALUE node.
      LVE_X,
      
+      /// VPERM - The PPC VPERM Instruction.
+      ///
+      VPERM,
+      
      /// Hi/Lo - These represent the high and low 16-bit parts of a global
      /// address respectively.  These nodes have two operands, the first of
      /// which must be a TargetGlobalAddress, and the second of which must be a
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@ -26,6 +26,10 @@ def SDT_PPCShiftOp : SDTypeProfile<1, 2, [   // PPCshl, PPCsra, PPCsrl
 def SDT_PPCCallSeq : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>;
 def SDT_PPCRetFlag : SDTypeProfile<0, 0, []>;

+def SDT_PPCvperm   : SDTypeProfile<1, 3, [
+  SDTCisVT<3, v16i8>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>
+]>;
+
 //===----------------------------------------------------------------------===//
 // PowerPC specific DAG Nodes.
 //
@ -46,6 +50,7 @@ def PPCvmaddfp  : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>;
 def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>;

 def PPClve_x    : SDNode<"PPCISD::LVE_X", SDTLoad, [SDNPHasChain]>;
+def PPCvperm    : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;

 // These nodes represent the 32-bit PPC shifts that operate on 6-bit shift
 // amounts.  These nodes are generated by the multi-precision shift code.
@ -118,15 +123,6 @@ def imm16Shifted : PatLeaf<(imm), [{
  return ((unsigned)N->getValue() & 0xFFFF0000U) == (unsigned)N->getValue();
 }], HI16>;

-/*
-// Example of a legalize expander: Only for PPC64.
-def : Expander<(set i64:$dst, (fp_to_sint f64:$src)),
-               [(set f64:$tmp , (FCTIDZ f64:$src)),
-                (set i32:$tmpFI, (CreateNewFrameIndex 8, 8)),
-                (store f64:$tmp, i32:$tmpFI),
-                (set i64:$dst, (load i32:$tmpFI))],
-                Subtarget_PPC64>;
-*/

 //===----------------------------------------------------------------------===//
 // PowerPC Flag Definitions.
@ -956,7 +952,9 @@ def VNMSUBFP: VAForm_1<47, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB, VRRC:$vC),
                       Requires<[FPContractions]>;

 def VPERM   : VAForm_1<43, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB, VRRC:$vC),
-                       "vperm $vD, $vA, $vC, $vB", VecFP, []>;
+                       "vperm $vD, $vA, $vC, $vB", VecFP,
+                       [(set VRRC:$vD,
+                             (PPCvperm (v4f32 VRRC:$vA), VRRC:$vB, VRRC:$vC))]>;


 // VX-Form instructions.  AltiVec arithmetic ops.
@ -1153,6 +1151,13 @@ def : Pat<(f64 (extload xaddr:$src, f32)),

 def : Pat<(v4i32 (load xoaddr:$src)),
          (v4i32 (LVX xoaddr:$src))>;
+def : Pat<(v16i8 (load xoaddr:$src)),
+          (v16i8 (LVX xoaddr:$src))>;
+
+
+def : Pat<(PPCvperm (v4i32 VRRC:$vA), VRRC:$vB, VRRC:$vC),
+          (v4i32 (VPERM VRRC:$vA, VRRC:$vB, VRRC:$vC))>;
+
 def : Pat<(store (v4i32 VRRC:$rS), xoaddr:$dst),
          (STVX (v4i32 VRRC:$rS), xoaddr:$dst)>;
 def : Pat<(v4i32 (PPClve_x xoaddr:$src)),