implement SplitVecOp_CONCAT_VECTORS, fixing the included testcase with SSE1.

llvm-svn: 112171
2010-08-26 05:51:22 +00:00 · 2010-08-26 05:51:22 +00:00 · eb2cc0ce0e
parent 4cec44975e
commit eb2cc0ce0e
4 changed files with 100 additions and 68 deletions
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@ -581,6 +581,7 @@ private:
  SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
  SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
  SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
+  SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);

  //===--------------------------------------------------------------------===//
  // Vector Widening Support: LegalizeVectorTypes.cpp
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@ -983,6 +983,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
    case ISD::BIT_CONVERT:       Res = SplitVecOp_BIT_CONVERT(N); break;
    case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
    case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
+    case ISD::CONCAT_VECTORS:    Res = SplitVecOp_CONCAT_VECTORS(N); break;
    case ISD::STORE:
      Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
      break;
@ -1091,8 +1092,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
      return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0);
    return SDValue(DAG.UpdateNodeOperands(N, Hi,
                                  DAG.getConstant(IdxVal - LoElts,
-                                                  Idx.getValueType())),
-                   0);
+                                                  Idx.getValueType())), 0);
  }

  // Store the vector to the stack.
@ -1113,7 +1113,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
 SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
  assert(N->isUnindexed() && "Indexed store of vector?");
  assert(OpNo == 1 && "Can only split the stored value");
-  DebugLoc dl = N->getDebugLoc();
+  DebugLoc DL = N->getDebugLoc();

  bool isTruncating = N->isTruncatingStore();
  SDValue Ch  = N->getChain();
@ -1132,25 +1132,49 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
  unsigned IncrementSize = LoMemVT.getSizeInBits()/8;

  if (isTruncating)
-    Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
+    Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getSrcValue(), SVOffset,
                           LoMemVT, isVol, isNT, Alignment);
  else
-    Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
+    Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getSrcValue(), SVOffset,
                      isVol, isNT, Alignment);

  // Increment the pointer to the other half.
-  Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+  Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
                    DAG.getIntPtrConstant(IncrementSize));
  SVOffset += IncrementSize;

  if (isTruncating)
-    Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset,
+    Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, N->getSrcValue(), SVOffset,
                           HiMemVT, isVol, isNT, Alignment);
  else
-    Hi = DAG.getStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset,
+    Hi = DAG.getStore(Ch, DL, Hi, Ptr, N->getSrcValue(), SVOffset,
                      isVol, isNT, Alignment);

-  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
+  DebugLoc DL = N->getDebugLoc();
+  
+  // The input operands all must have the same type, and we know the result the
+  // result type is valid.  Convert this to a buildvector which extracts all the
+  // input elements.
+  // TODO: If the input elements are power-two vectors, we could convert this to
+  // a new CONCAT_VECTORS node with elements that are half-wide.
+  SmallVector<SDValue, 32> Elts;
+  EVT EltVT = N->getValueType(0).getVectorElementType();
+  for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) {
+    SDValue Op = N->getOperand(op);
+    for (unsigned i = 0, e = Op.getValueType().getVectorNumElements();
+         i != e; ++i) {
+      Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
+                                 Op, DAG.getIntPtrConstant(i)));
+
+    }
+  }
+  
+  return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0),
+                     &Elts[0], Elts.size());
 }


@ -2223,8 +2247,15 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,

  // Check if we can load the element with one instruction
  if (LdWidth <= NewVTWidth) {
-    if (NewVT.isVector()) {
-      if (NewVT != WidenVT) {
+    if (!NewVT.isVector()) {
+      unsigned NumElts = WidenWidth / NewVTWidth;
+      EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
+      SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
+      return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, VecOp);
+    }
+    if (NewVT == WidenVT)
+      return LdOp;
+
    assert(WidenWidth % NewVTWidth == 0);
    unsigned NumConcat = WidenWidth / NewVTWidth;
    SmallVector<SDValue, 16> ConcatOps(NumConcat);
@ -2234,14 +2265,6 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
      ConcatOps[i] = UndefVal;
    return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0],
                       NumConcat);
-      } else
-        return LdOp;
-    } else {
-      unsigned NumElts = WidenWidth / NewVTWidth;
-      EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
-      SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
-      return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, VecOp);
-    }
  }

  // Load vector by using multiple loads from largest vector to scalar
@ -2274,7 +2297,10 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,

  // Build the vector from the loads operations
  unsigned End = LdOps.size();
-  if (LdOps[0].getValueType().isVector()) {
+  if (!LdOps[0].getValueType().isVector())
+    // All the loads are scalar loads.
+    return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End);
+  
  // If the load contains vectors, build the vector using concat vector.
  // All of the vectors used to loads are power of 2 and the scalars load
  // can be combined to make a power of 2 vector.
@ -2304,22 +2330,22 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
    ConcatOps[--Idx] = LdOps[i];
  }

-    if (WidenWidth != LdTy.getSizeInBits()*(End - Idx)) {
+  if (WidenWidth == LdTy.getSizeInBits()*(End - Idx))
+    return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
+                       &ConcatOps[Idx], End - Idx);
+
  // We need to fill the rest with undefs to build the vector
  unsigned NumOps = WidenWidth / LdTy.getSizeInBits();
  SmallVector<SDValue, 16> WidenOps(NumOps);
  SDValue UndefVal = DAG.getUNDEF(LdTy);
+  {
    unsigned i = 0;
    for (; i != End-Idx; ++i)
      WidenOps[i] = ConcatOps[Idx+i];
    for (; i != NumOps; ++i)
      WidenOps[i] = UndefVal;
+  }
  return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &WidenOps[0],NumOps);
-    } else
-      return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
-                         &ConcatOps[Idx], End - Idx);
-  } else // All the loads are scalar loads.
-    return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End);
 }

 SDValue
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@ -1320,9 +1320,8 @@ X86TargetLowering::LowerReturn(SDValue Chain,
    // llvm-gcc has never done it right and no one has noticed, so this
    // should be OK for now.
    if (ValVT == MVT::f64 &&
-        (Subtarget->is64Bit() && !Subtarget->hasSSE2())) {
+        (Subtarget->is64Bit() && !Subtarget->hasSSE2()))
      report_fatal_error("SSE2 register return with SSE2 disabled");
-    }

    // Returns in ST0/ST1 are handled specially: these are pushed as operands to
    // the RET instruction and handled by the FP Stackifier.
--- a/llvm/test/CodeGen/X86/sse1.ll
+++ b/llvm/test/CodeGen/X86/sse1.ll
@ -6,3 +6,9 @@ define <8 x i16> @test1(<8 x i32> %a) nounwind {
 ; CHECK: test1
  ret <8 x i16> zeroinitializer
 }
+
+define <8 x i16> @test2(<8 x i32> %a) nounwind {
+; CHECK: test2
+  %c = trunc <8 x i32> %a to <8 x i16>            ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %c
+}