From a9a1313386f65f9380ea16c20c63aaa832027456 Mon Sep 17 00:00:00 2001
From: Chris Lattner <sabre@nondot.org>
Date: Mon, 20 Mar 2006 06:51:10 +0000
Subject: [PATCH] Add support for generating vspltw, instead of a vperm
 instruction with a constant pool load.  This generates significantly nicer
 code for splats.

When tblgen gets bugfixed, we can remove the custom selection code.

llvm-svn: 26898
---
 llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 16 ++++++++++++
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 28 ++++++++++++++-------
 llvm/lib/Target/PowerPC/PPCInstrInfo.td     | 10 +++++---
 3 files changed, 41 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 7ddf8c0104dd..39f544692687 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -927,6 +927,22 @@ void PPCDAGToDAGISel::Select(SDOperand &Result, SDOperand Op) {
   
   switch (N->getOpcode()) {
   default: break;
+  case ISD::VECTOR_SHUFFLE:
+    // FIXME: This should be autogenerated from the .td file, it is here for now
+    // due to bugs in tblgen.
+    if (Op.getOperand(1).getOpcode() == ISD::UNDEF &&
+        (Op.getValueType() == MVT::v4f32 || Op.getValueType() == MVT::v4i32) &&
+        PPC::isSplatShuffleMask(Op.getOperand(2).Val)) {
+      SDOperand N0;
+      Select(N0, N->getOperand(0));
+
+      Result = CodeGenMap[Op] = 
+        SDOperand(CurDAG->getTargetNode(PPC::VSPLTW, MVT::v4f32,
+                      getI32Imm(PPC::getVSPLTImmediate(Op.getOperand(2).Val)),
+                                        N0), 0);
+      return;
+    }
+    assert(0 && "ILLEGAL VECTOR_SHUFFLE!");
   case ISD::SETCC:
     Result = SelectSETCC(Op);
     return;
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index eeed0dfe742a..ee41ed13b464 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -245,6 +245,12 @@ static bool isFloatingPointZero(SDOperand Op) {
 /// VSPLTB/VSPLTH/VSPLTW.
 bool PPC::isSplatShuffleMask(SDNode *N) {
   assert(N->getOpcode() == ISD::BUILD_VECTOR);
+  
+  // We can only splat 8-bit, 16-bit, and 32-bit quantities.
+  if (N->getNumOperands() != 4 && N->getNumOperands() != 8 &&
+      N->getNumOperands() != 16)
+    return false;
+  
   // This is a splat operation if each element of the permute is the same, and
   // if the value doesn't reference the second vector.
   SDOperand Elt = N->getOperand(0);
@@ -263,11 +269,10 @@ bool PPC::isSplatShuffleMask(SDNode *N) {
 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
 unsigned PPC::getVSPLTImmediate(SDNode *N) {
   assert(isSplatShuffleMask(N));
-  return cast<ConstantSDNode>(N)->getValue();
+  return cast<ConstantSDNode>(N->getOperand(0))->getValue();
 }
 
 
-
 /// LowerOperation - Provide custom lowering hooks for some operations.
 ///
 SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
@@ -602,17 +607,22 @@ SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
                        DAG.getSrcValue(NULL));
   }
   case ISD::VECTOR_SHUFFLE: {
-    // FIXME: Cases that are handled by instructions that take permute
-    // immediates (such as vsplt*) shouldn't be lowered here!  Also handle cases
-    // that are cheaper to do as multiple such instructions than as a constant
-    // pool load/vperm pair.
+    SDOperand V1 = Op.getOperand(0);
+    SDOperand V2 = Op.getOperand(1);
+    SDOperand PermMask = Op.getOperand(2);
+    
+    // Cases that are handled by instructions that take permute immediates
+    // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
+    // selected by the instruction selector.
+    if (PPC::isSplatShuffleMask(PermMask.Val) && V2.getOpcode() == ISD::UNDEF)
+      break;
+    
+    // TODO: Handle more cases, and also handle cases that are cheaper to do as
+    // multiple such instructions than as a constant pool load/vperm pair.
     
     // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
     // vector that will get spilled to the constant pool.
-    SDOperand V1 = Op.getOperand(0);
-    SDOperand V2 = Op.getOperand(1);
     if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
-    SDOperand PermMask = Op.getOperand(2);
     
     // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
     // that it is in input element units, not in bytes.  Convert now.
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 2e63119c9c8f..66e89dc09dcd 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1032,10 +1032,12 @@ def VSPLTH : VXForm_1<588, (ops VRRC:$vD, u5imm:$UIMM, VRRC:$vB),
                       "vsplth $vD, $vB, $UIMM", VecPerm,
                       []>;
                       
-//def VSPLTW : VXForm_1<652, (ops VRRC:$vD, u5imm:$UIMM, VRRC:$vB),
-//                      "vspltw $vD, $vB, $UIMM", VecPerm,
-//                      [(set VRRC:$vD, (vector_shuffle (v4f32 VRRC:$vB), (undef),
-//                                      VSPLT_shuffle_mask:$UIMM))]>;
+def VSPLTW : VXForm_1<652, (ops VRRC:$vD, u5imm:$UIMM, VRRC:$vB),
+                      "vspltw $vD, $vB, $UIMM", VecPerm,
+                      [/*
+                       (set VRRC:$vD, (vector_shuffle (v4f32 VRRC:$vB), (undef),
+                                      VSPLT_shuffle_mask:$UIMM))*/]>;
+                      // FIXME: ALSO ADD SUPPORT FOR v4i32!
                       
 // VX-Form Pseudo Instructions