Reland "[SelectionDAG] Enable target specific vector scalarization of calls and returns"

By target hookifying getRegisterType, getNumRegisters, getVectorBreakdown, backends can request that LLVM to scalarize vector types for calls and returns. The MIPS vector ABI requires that vector arguments and returns are passed in integer registers. With SelectionDAG's new hooks, the MIPS backend can now handle LLVM-IR with vector types in calls and returns. E.g. 'call @foo(<4 x i32> %4)'. Previously these cases would be scalarized for the MIPS O32/N32/N64 ABI for calls and returns if vector types were not legal. If vector types were legal, a single 128bit vector argument would be assigned to a single 32 bit / 64 bit integer register. By teaching the MIPS backend to inspect the original types, it can now implement the MIPS vector ABI which requires a particular method of scalarizing vectors. Previously, the MIPS backend relied on clang to scalarize types such as "call @foo(<4 x float> %a) into "call @foo(i32 inreg %1, i32 inreg %2, i32 inreg %3, i32 inreg %4)". This patch enables the MIPS backend to take either form for vector types. The previous version of this patch had a "conditional move or jump depends on uninitialized value". Reviewers: zoran.jovanovic, jaydeep, vkalintiris, slthakur Differential Revision: https://reviews.llvm.org/D27845 llvm-svn: 305083
2017-06-09 14:37:08 +00:00 · 2017-06-09 14:37:08 +00:00 · 212cccb2f4
parent a1cee29608
commit 212cccb2f4
15 changed files with 2116 additions and 104 deletions
--- a/llvm/include/llvm/Target/TargetLowering.h
+++ b/llvm/include/llvm/Target/TargetLowering.h
@ -677,6 +677,16 @@ public:
                                  unsigned &NumIntermediates,
                                  MVT &RegisterVT) const;

+  /// Certain targets such as MIPS require that some types such as vectors are
+  /// always broken down into scalars in some contexts. This occurs even if the
+  /// vector type is legal.
+  virtual unsigned getVectorTypeBreakdownForCallingConv(
+      LLVMContext &Context, EVT VT, EVT &IntermediateVT,
+      unsigned &NumIntermediates, MVT &RegisterVT) const {
+    return getVectorTypeBreakdown(Context, VT, IntermediateVT, NumIntermediates,
+                                  RegisterVT);
+  }
+
  struct IntrinsicInfo {
    unsigned     opc = 0;          // target opcode
    EVT          memVT;            // memory VT
@ -1085,6 +1095,33 @@ public:
    llvm_unreachable("Unsupported extended type!");
  }

+  /// Certain combinations of ABIs, Targets and features require that types
+  /// are legal for some operations and not for other operations.
+  /// For MIPS all vector types must be passed through the integer register set.
+  virtual MVT getRegisterTypeForCallingConv(MVT VT) const {
+    return getRegisterType(VT);
+  }
+
+  virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context,
+                                            EVT VT) const {
+    return getRegisterType(Context, VT);
+  }
+
+  /// Certain targets require unusual breakdowns of certain types. For MIPS,
+  /// this occurs when a vector type is used, as vector are passed through the
+  /// integer register set.
+  virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context,
+                                                 EVT VT) const {
+    return getNumRegisters(Context, VT);
+  }
+
+  /// Certain targets have context senstive alignment requirements, where one
+  /// type has the alignment requirement of another type.
+  virtual unsigned getABIAlignmentForCallingConv(Type *ArgTy,
+                                                 DataLayout DL) const {
+    return DL.getABITypeAlignment(ArgTy);
+  }
+
  /// If true, then instruction selection should seek to shrink the FP constant
  /// of the specified type to a smaller type in order to save space and / or
  /// reduce runtime.
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@ -101,7 +101,8 @@ static const unsigned MaxParallelChains = 64;

 static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
                                      const SDValue *Parts, unsigned NumParts,
-                                      MVT PartVT, EVT ValueVT, const Value *V);
+                                      MVT PartVT, EVT ValueVT, const Value *V,
+                                      bool IsABIRegCopy);

 /// getCopyFromParts - Create a value that contains the specified legal parts
 /// combined into the value they represent.  If the parts combine to a type
@ -111,10 +112,11 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
 static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
                                const SDValue *Parts, unsigned NumParts,
                                MVT PartVT, EVT ValueVT, const Value *V,
-                                Optional<ISD::NodeType> AssertOp = None) {
+                                Optional<ISD::NodeType> AssertOp = None,
+                                bool IsABIRegCopy = false) {
  if (ValueVT.isVector())
    return getCopyFromPartsVector(DAG, DL, Parts, NumParts,
-                                  PartVT, ValueVT, V);
+                                  PartVT, ValueVT, V, IsABIRegCopy);

  assert(NumParts > 0 && "No parts to assemble!");
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@ -258,7 +260,8 @@ static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V,
 /// ValueVT (ISD::AssertSext).
 static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
                                      const SDValue *Parts, unsigned NumParts,
-                                      MVT PartVT, EVT ValueVT, const Value *V) {
+                                      MVT PartVT, EVT ValueVT, const Value *V,
+                                      bool IsABIRegCopy) {
  assert(ValueVT.isVector() && "Not a vector value");
  assert(NumParts > 0 && "No parts to assemble!");
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@ -269,9 +272,18 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
    EVT IntermediateVT;
    MVT RegisterVT;
    unsigned NumIntermediates;
-    unsigned NumRegs =
-    TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
-                               NumIntermediates, RegisterVT);
+    unsigned NumRegs;
+
+    if (IsABIRegCopy) {
+      NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
+          *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates,
+          RegisterVT);
+    } else {
+      NumRegs =
+          TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
+                                     NumIntermediates, RegisterVT);
+    }
+
    assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
    NumParts = NumRegs; // Silence a compiler warning.
    assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
@ -300,9 +312,14 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,

    // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
    // intermediate operands.
+    EVT BuiltVectorTy =
+        EVT::getVectorVT(*DAG.getContext(), IntermediateVT.getScalarType(),
+                         (IntermediateVT.isVector()
+                              ? IntermediateVT.getVectorNumElements() * NumParts
+                              : NumIntermediates));
    Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS
                                                : ISD::BUILD_VECTOR,
-                      DL, ValueVT, Ops);
+                      DL, BuiltVectorTy, Ops);
  }

  // There is now one part, held in Val.  Correct it to match ValueVT.
@ -341,13 +358,29 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
      TLI.isTypeLegal(ValueVT))
    return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);

-  // Handle cases such as i8 -> <1 x i1>
  if (ValueVT.getVectorNumElements() != 1) {
-    diagnosePossiblyInvalidConstraint(*DAG.getContext(), V,
-                                      "non-trivial scalar-to-vector conversion");
-    return DAG.getUNDEF(ValueVT);
+     // Certain ABIs require that vectors are passed as integers. For vectors
+     // are the same size, this is an obvious bitcast.
+     if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) {
+       return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+     } else if (ValueVT.getSizeInBits() < PartEVT.getSizeInBits()) {
+       // Bitcast Val back the original type and extract the corresponding
+       // vector we want.
+       unsigned Elts = PartEVT.getSizeInBits() / ValueVT.getScalarSizeInBits();
+       EVT WiderVecType = EVT::getVectorVT(*DAG.getContext(),
+                                           ValueVT.getVectorElementType(), Elts);
+       Val = DAG.getBitcast(WiderVecType, Val);
+       return DAG.getNode(
+           ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
+           DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+     }
+
+     diagnosePossiblyInvalidConstraint(
+         *DAG.getContext(), V, "non-trivial scalar-to-vector conversion");
+     return DAG.getUNDEF(ValueVT);
  }

+  // Handle cases such as i8 -> <1 x i1>
  EVT ValueSVT = ValueVT.getVectorElementType();
  if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT)
    Val = ValueVT.isFloatingPoint() ? DAG.getFPExtendOrRound(Val, DL, ValueSVT)
@ -358,7 +391,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,

 static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,
                                 SDValue Val, SDValue *Parts, unsigned NumParts,
-                                 MVT PartVT, const Value *V);
+                                 MVT PartVT, const Value *V, bool IsABIRegCopy);

 /// getCopyToParts - Create a series of nodes that contain the specified value
 /// split into legal parts.  If the parts contain more bits than Val, then, for
@ -366,12 +399,14 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,
 static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
                           SDValue *Parts, unsigned NumParts, MVT PartVT,
                           const Value *V,
-                           ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
+                           ISD::NodeType ExtendKind = ISD::ANY_EXTEND,
+                           bool IsABIRegCopy = false) {
  EVT ValueVT = Val.getValueType();

  // Handle the vector case separately.
  if (ValueVT.isVector())
-    return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V);
+    return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V,
+                                IsABIRegCopy);

  unsigned PartBits = PartVT.getSizeInBits();
  unsigned OrigNumParts = NumParts;
@ -496,7 +531,9 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
 /// value split into legal parts.
 static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
                                 SDValue Val, SDValue *Parts, unsigned NumParts,
-                                 MVT PartVT, const Value *V) {
+                                 MVT PartVT, const Value *V,
+                                 bool IsABIRegCopy) {
+
  EVT ValueVT = Val.getValueType();
  assert(ValueVT.isVector() && "Not a vector");
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@ -537,13 +574,20 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,

      // Promoted vector extract
      Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
-    } else{
-      // Vector -> scalar conversion.
-      assert(ValueVT.getVectorNumElements() == 1 &&
-             "Only trivial vector-to-scalar conversions should get here!");
-      Val = DAG.getNode(
-          ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
-          DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+    } else {
+      if (ValueVT.getVectorNumElements() == 1) {
+        Val = DAG.getNode(
+            ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
+            DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+      } else {
+        assert(PartVT.getSizeInBits() > ValueVT.getSizeInBits() &&
+               "lossy conversion of vector to scalar type");
+        EVT IntermediateType =
+            EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
+        Val = DAG.getBitcast(IntermediateType, Val);
+        Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
+      }
    }

    assert(Val.getValueType() == PartVT && "Unexpected vector part value type");
@ -555,15 +599,31 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
  EVT IntermediateVT;
  MVT RegisterVT;
  unsigned NumIntermediates;
-  unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT,
-                                                IntermediateVT,
-                                                NumIntermediates, RegisterVT);
+  unsigned NumRegs;
+  if (IsABIRegCopy) {
+    NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
+        *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates,
+        RegisterVT);
+  } else {
+    NumRegs =
+        TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
+                                   NumIntermediates, RegisterVT);
+  }
  unsigned NumElements = ValueVT.getVectorNumElements();

  assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
  NumParts = NumRegs; // Silence a compiler warning.
  assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");

+  // Convert the vector to the appropiate type if necessary.
+  unsigned DestVectorNoElts =
+      NumIntermediates *
+      (IntermediateVT.isVector() ? IntermediateVT.getVectorNumElements() : 1);
+  EVT BuiltVectorTy = EVT::getVectorVT(
+      *DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts);
+  if (Val.getValueType() != BuiltVectorTy)
+    Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val);
+
  // Split the vector into intermediate operands.
  SmallVector<SDValue, 8> Ops(NumIntermediates);
  for (unsigned i = 0; i != NumIntermediates; ++i) {
@ -596,22 +656,31 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
  }
 }

-RegsForValue::RegsForValue() {}
+RegsForValue::RegsForValue() { IsABIMangled = false; }

 RegsForValue::RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt,
-                           EVT valuevt)
-    : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
+                           EVT valuevt, bool IsABIMangledValue)
+    : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs),
+      RegCount(1, regs.size()), IsABIMangled(IsABIMangledValue) {}

 RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
-                           const DataLayout &DL, unsigned Reg, Type *Ty) {
+                           const DataLayout &DL, unsigned Reg, Type *Ty,
+                           bool IsABIMangledValue) {
  ComputeValueVTs(TLI, DL, Ty, ValueVTs);

+  IsABIMangled = IsABIMangledValue;
+
  for (EVT ValueVT : ValueVTs) {
-    unsigned NumRegs = TLI.getNumRegisters(Context, ValueVT);
-    MVT RegisterVT = TLI.getRegisterType(Context, ValueVT);
+    unsigned NumRegs = IsABIMangledValue
+                           ? TLI.getNumRegistersForCallingConv(Context, ValueVT)
+                           : TLI.getNumRegisters(Context, ValueVT);
+    MVT RegisterVT = IsABIMangledValue
+                         ? TLI.getRegisterTypeForCallingConv(Context, ValueVT)
+                         : TLI.getRegisterType(Context, ValueVT);
    for (unsigned i = 0; i != NumRegs; ++i)
      Regs.push_back(Reg + i);
    RegVTs.push_back(RegisterVT);
+    RegCount.push_back(NumRegs);
    Reg += NumRegs;
  }
 }
@ -632,8 +701,10 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
  for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
    // Copy the legal parts from the registers.
    EVT ValueVT = ValueVTs[Value];
-    unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
-    MVT RegisterVT = RegVTs[Value];
+    unsigned NumRegs = RegCount[Value];
+    MVT RegisterVT = IsABIMangled
+                         ? TLI.getRegisterTypeForCallingConv(RegVTs[Value])
+                         : RegVTs[Value];

    Parts.resize(NumRegs);
    for (unsigned i = 0; i != NumRegs; ++i) {
@ -728,9 +799,11 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
  unsigned NumRegs = Regs.size();
  SmallVector<SDValue, 8> Parts(NumRegs);
  for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
-    EVT ValueVT = ValueVTs[Value];
-    unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
-    MVT RegisterVT = RegVTs[Value];
+    unsigned NumParts = RegCount[Value];
+
+    MVT RegisterVT = IsABIMangled
+                         ? TLI.getRegisterTypeForCallingConv(RegVTs[Value])
+                         : RegVTs[Value];

    if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT))
      ExtendKind = ISD::ZERO_EXTEND;
@ -953,10 +1026,16 @@ SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) {

  if (It != FuncInfo.ValueMap.end()) {
    unsigned InReg = It->second;
+    bool IsABIRegCopy =
+        V && ((isa<CallInst>(V) &&
+               !(static_cast<const CallInst *>(V))->isInlineAsm()) ||
+              isa<ReturnInst>(V));
+
    RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
-                     DAG.getDataLayout(), InReg, Ty);
+                     DAG.getDataLayout(), InReg, Ty, IsABIRegCopy);
    SDValue Chain = DAG.getEntryNode();
-    Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
+    Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr,
+                                 V);
    resolveDanglingDebugInfo(V, Result);
  }

@ -1142,8 +1221,13 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
  // If this is an instruction which fast-isel has deferred, select it now.
  if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
    unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
+    bool IsABIRegCopy =
+        V && ((isa<CallInst>(V) &&
+               !(static_cast<const CallInst *>(V))->isInlineAsm()) ||
+              isa<ReturnInst>(V));
+
    RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg,
-                     Inst->getType());
+                     Inst->getType(), IsABIRegCopy);
    SDValue Chain = DAG.getEntryNode();
    return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
  }
@ -1371,12 +1455,12 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
        if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
          VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind);

-        unsigned NumParts = TLI.getNumRegisters(Context, VT);
-        MVT PartVT = TLI.getRegisterType(Context, VT);
+        unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, VT);
+        MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, VT);
        SmallVector<SDValue, 4> Parts(NumParts);
        getCopyToParts(DAG, getCurSDLoc(),
                       SDValue(RetOp.getNode(), RetOp.getResNo() + j),
-                       &Parts[0], NumParts, PartVT, &I, ExtendKind);
+                       &Parts[0], NumParts, PartVT, &I, ExtendKind, true);

        // 'inreg' on function refers to return value
        ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
@ -7112,8 +7196,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {

          SDLoc dl = getCurSDLoc();
          // Use the produced MatchedRegs object to
-          MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl,
-                                    Chain, &Flag, CS.getInstruction());
+          MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag,
+                                    CS.getInstruction());
          MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
                                           true, OpInfo.getMatchedOperand(), dl,
                                           DAG, AsmNodeOperands);
@ -7799,8 +7883,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
  } else {
    for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
      EVT VT = RetTys[I];
-      MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
-      unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
+      MVT RegisterVT =
+          getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT);
+      unsigned NumRegs =
+          getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT);
      for (unsigned i = 0; i != NumRegs; ++i) {
        ISD::InputArg MyFlags;
        MyFlags.VT = RegisterVT;
@ -7849,7 +7935,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
      SDValue Op = SDValue(Args[i].Node.getNode(),
                           Args[i].Node.getResNo() + Value);
      ISD::ArgFlagsTy Flags;
-      unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
+
+      // Certain targets (such as MIPS), may have a different ABI alignment
+      // for a type depending on the context. Give the target a chance to
+      // specify the alignment it wants.
+      unsigned OriginalAlignment = getABIAlignmentForCallingConv(ArgTy, DL);

      if (Args[i].IsZExt)
        Flags.setZExt();
@ -7904,8 +7994,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
        Flags.setInConsecutiveRegs();
      Flags.setOrigAlign(OriginalAlignment);

-      MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT);
-      unsigned NumParts = getNumRegisters(CLI.RetTy->getContext(), VT);
+      MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT);
+      unsigned NumParts =
+          getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT);
      SmallVector<SDValue, 4> Parts(NumParts);
      ISD::NodeType ExtendKind = ISD::ANY_EXTEND;

@ -7935,7 +8026,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
      }

      getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT,
-                     CLI.CS ? CLI.CS->getInstruction() : nullptr, ExtendKind);
+                     CLI.CS ? CLI.CS->getInstruction() : nullptr, ExtendKind,
+                     true);

      for (unsigned j = 0; j != NumParts; ++j) {
        // if it isn't first piece, alignment must be 1
@ -8035,12 +8127,14 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
    unsigned CurReg = 0;
    for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
      EVT VT = RetTys[I];
-      MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
-      unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
+      MVT RegisterVT =
+          getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT);
+      unsigned NumRegs =
+          getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT);

      ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
                                              NumRegs, RegisterVT, VT, nullptr,
-                                              AssertOp));
+                                              AssertOp, true));
      CurReg += NumRegs;
    }

@ -8076,8 +8170,15 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
  assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");

  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  // If this is an InlineAsm we have to match the registers required, not the
+  // notional registers required by the type.
+  bool IsABIRegCopy =
+    V && ((isa<CallInst>(V) &&
+           !(static_cast<const CallInst *>(V))->isInlineAsm()) ||
+          isa<ReturnInst>(V));
+
  RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg,
-                   V->getType());
+                   V->getType(), IsABIRegCopy);
  SDValue Chain = DAG.getEntryNode();

  ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) ==
@ -8319,7 +8420,12 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
      EVT VT = ValueVTs[Value];
      Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
      ISD::ArgFlagsTy Flags;
-      unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
+
+      // Certain targets (such as MIPS), may have a different ABI alignment
+      // for a type depending on the context. Give the target a chance to
+      // specify the alignment it wants.
+      unsigned OriginalAlignment =
+          TLI->getABIAlignmentForCallingConv(ArgTy, DL);

      if (Arg.hasAttribute(Attribute::ZExt))
        Flags.setZExt();
@ -8381,8 +8487,10 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
      if (ArgCopyElisionCandidates.count(&Arg))
        Flags.setCopyElisionCandidate();

-      MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
-      unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT);
+      MVT RegisterVT =
+          TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(), VT);
+      unsigned NumRegs =
+          TLI->getNumRegistersForCallingConv(*CurDAG->getContext(), VT);
      for (unsigned i = 0; i != NumRegs; ++i) {
        ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed,
                              ArgNo, PartBase+i*RegisterVT.getStoreSize());
@ -8486,8 +8594,10 @@ void SelectionDAGISel::LowerArguments(const Function &F) {

    for (unsigned Val = 0; Val != NumValues; ++Val) {
      EVT VT = ValueVTs[Val];
-      MVT PartVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
-      unsigned NumParts = TLI->getNumRegisters(*CurDAG->getContext(), VT);
+      MVT PartVT =
+          TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(), VT);
+      unsigned NumParts =
+          TLI->getNumRegistersForCallingConv(*CurDAG->getContext(), VT);

      // Even an apparant 'unused' swifterror argument needs to be returned. So
      // we do generate a copy for it that can be used on return from the
@ -8500,7 +8610,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
          AssertOp = ISD::AssertZext;

        ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts,
-                                             PartVT, VT, nullptr, AssertOp));
+                                             PartVT, VT, nullptr, AssertOp,
+                                             true));
      }

      i += NumParts;
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@ -975,18 +975,28 @@ struct RegsForValue {
  /// expanded value requires multiple registers.
  SmallVector<unsigned, 4> Regs;

+  /// This list holds the number of registers for each value.
+  SmallVector<unsigned, 4> RegCount;
+
+  /// Records if this value needs to be treated in an ABI dependant manner,
+  /// different to normal type legalization.
+  bool IsABIMangled;
+
  RegsForValue();

-  RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt, EVT valuevt);
+  RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt, EVT valuevt,
+               bool IsABIMangledValue = false);

  RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
-               const DataLayout &DL, unsigned Reg, Type *Ty);
+               const DataLayout &DL, unsigned Reg, Type *Ty,
+               bool IsABIMangledValue = false);

  /// Add the specified values to this one.
  void append(const RegsForValue &RHS) {
    ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
    RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end());
    Regs.append(RHS.Regs.begin(), RHS.Regs.end());
+    RegCount.push_back(RHS.Regs.size());
  }

  /// Emit a series of CopyFromReg nodes that copies from this value and returns
--- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@ -840,7 +840,7 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP,
      //       completely and make statepoint call to return a tuple.
      unsigned Reg = FuncInfo.CreateRegs(RetTy);
      RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
-                       DAG.getDataLayout(), Reg, RetTy);
+                       DAG.getDataLayout(), Reg, RetTy, true);
      SDValue Chain = DAG.getEntryNode();

      RFV.getCopyToRegs(ReturnValue, DAG, getCurSDLoc(), Chain, nullptr);
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@ -1637,8 +1637,10 @@ void llvm::GetReturnInfo(Type *ReturnType, AttributeList attr,
        VT = MinVT;
    }

-    unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT);
-    MVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT);
+    unsigned NumParts =
+        TLI.getNumRegistersForCallingConv(ReturnType->getContext(), VT);
+    MVT PartVT =
+        TLI.getRegisterTypeForCallingConv(ReturnType->getContext(), VT);

    // 'inreg' on function refers to return value
    ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
--- a/llvm/lib/Target/Mips/MipsCCState.cpp
+++ b/llvm/lib/Target/Mips/MipsCCState.cpp
@ -51,6 +51,22 @@ static bool originalTypeIsF128(const Type *Ty, const char *Func) {
  return (Func && Ty->isIntegerTy(128) && isF128SoftLibCall(Func));
 }

+/// Return true if the original type was vXfXX.
+static bool originalEVTTypeIsVectorFloat(EVT Ty) {
+  if (Ty.isVector() && Ty.getVectorElementType().isFloatingPoint())
+    return true;
+
+  return false;
+}
+
+/// Return true if the original type was vXfXX / vXfXX.
+static bool originalTypeIsVectorFloat(const Type * Ty) {
+  if (Ty->isVectorTy() && Ty->isFPOrFPVectorTy())
+    return true;
+
+  return false;
+}
+
 MipsCCState::SpecialCallingConvType
 MipsCCState::getSpecialCallingConvForCallee(const SDNode *Callee,
                                            const MipsSubtarget &Subtarget) {
@ -78,8 +94,8 @@ void MipsCCState::PreAnalyzeCallResultForF128(
  }
 }

-/// Identify lowered values that originated from f128 arguments and record
-/// this for use by RetCC_MipsN.
+/// Identify lowered values that originated from f128 or float arguments and
+/// record this for use by RetCC_MipsN.
 void MipsCCState::PreAnalyzeReturnForF128(
    const SmallVectorImpl<ISD::OutputArg> &Outs) {
  const MachineFunction &MF = getMachineFunction();
@ -91,23 +107,44 @@ void MipsCCState::PreAnalyzeReturnForF128(
  }
 }

-/// Identify lowered values that originated from f128 arguments and record
+/// Identify lower values that originated from vXfXX and record
 /// this.
+void MipsCCState::PreAnalyzeCallResultForVectorFloat(
+    const SmallVectorImpl<ISD::InputArg> &Ins, const Type *RetTy) {
+  for (unsigned i = 0; i < Ins.size(); ++i) {
+    OriginalRetWasFloatVector.push_back(originalTypeIsVectorFloat(RetTy));
+  }
+}
+
+/// Identify lowered values that originated from vXfXX arguments and record
+/// this.
+void MipsCCState::PreAnalyzeReturnForVectorFloat(
+    const SmallVectorImpl<ISD::OutputArg> &Outs) {
+  for (unsigned i = 0; i < Outs.size(); ++i) {
+    ISD::OutputArg Out = Outs[i];
+    OriginalRetWasFloatVector.push_back(
+        originalEVTTypeIsVectorFloat(Out.ArgVT));
+  }
+}
+
+/// Identify lowered values that originated from f128, float and sret to vXfXX
+/// arguments and record this.
 void MipsCCState::PreAnalyzeCallOperands(
    const SmallVectorImpl<ISD::OutputArg> &Outs,
    std::vector<TargetLowering::ArgListEntry> &FuncArgs,
    const char *Func) {
  for (unsigned i = 0; i < Outs.size(); ++i) {
-    OriginalArgWasF128.push_back(
-        originalTypeIsF128(FuncArgs[Outs[i].OrigArgIndex].Ty, Func));
-    OriginalArgWasFloat.push_back(
-        FuncArgs[Outs[i].OrigArgIndex].Ty->isFloatingPointTy());
+    TargetLowering::ArgListEntry FuncArg = FuncArgs[Outs[i].OrigArgIndex];
+
+    OriginalArgWasF128.push_back(originalTypeIsF128(FuncArg.Ty, Func));
+    OriginalArgWasFloat.push_back(FuncArg.Ty->isFloatingPointTy());
+    OriginalArgWasFloatVector.push_back(FuncArg.Ty->isVectorTy());
    CallOperandIsFixed.push_back(Outs[i].IsFixed);
  }
 }

-/// Identify lowered values that originated from f128 arguments and record
-/// this.
+/// Identify lowered values that originated from f128, float and vXfXX arguments
+/// and record this.
 void MipsCCState::PreAnalyzeFormalArgumentsForF128(
    const SmallVectorImpl<ISD::InputArg> &Ins) {
  const MachineFunction &MF = getMachineFunction();
@ -120,6 +157,7 @@ void MipsCCState::PreAnalyzeFormalArgumentsForF128(
    if (Ins[i].Flags.isSRet()) {
      OriginalArgWasF128.push_back(false);
      OriginalArgWasFloat.push_back(false);
+      OriginalArgWasFloatVector.push_back(false);
      continue;
    }

@ -129,5 +167,10 @@ void MipsCCState::PreAnalyzeFormalArgumentsForF128(
    OriginalArgWasF128.push_back(
        originalTypeIsF128(FuncArg->getType(), nullptr));
    OriginalArgWasFloat.push_back(FuncArg->getType()->isFloatingPointTy());
+
+    // The MIPS vector ABI exhibits a corner case of sorts or quirk; if the
+    // first argument is actually an SRet pointer to a vector, then the next
+    // argument slot is $a2.
+    OriginalArgWasFloatVector.push_back(FuncArg->getType()->isVectorTy());
  }
 }
--- a/llvm/lib/Target/Mips/MipsCCState.h
+++ b/llvm/lib/Target/Mips/MipsCCState.h
@ -45,16 +45,33 @@ private:
                         const char *Func);

  /// Identify lowered values that originated from f128 arguments and record
-  /// this.
+  /// this for use by RetCC_MipsN.
  void
  PreAnalyzeFormalArgumentsForF128(const SmallVectorImpl<ISD::InputArg> &Ins);

+  void
+  PreAnalyzeCallResultForVectorFloat(const SmallVectorImpl<ISD::InputArg> &Ins,
+                                     const Type *RetTy);
+
+  void PreAnalyzeFormalArgumentsForVectorFloat(
+      const SmallVectorImpl<ISD::InputArg> &Ins);
+
+  void
+  PreAnalyzeReturnForVectorFloat(const SmallVectorImpl<ISD::OutputArg> &Outs);
+
  /// Records whether the value has been lowered from an f128.
  SmallVector<bool, 4> OriginalArgWasF128;

  /// Records whether the value has been lowered from float.
  SmallVector<bool, 4> OriginalArgWasFloat;

+  /// Records whether the value has been lowered from a floating point vector.
+  SmallVector<bool, 4> OriginalArgWasFloatVector;
+
+  /// Records whether the return value has been lowered from a floating point
+  /// vector.
+  SmallVector<bool, 4> OriginalRetWasFloatVector;
+
  /// Records whether the value was a fixed argument.
  /// See ISD::OutputArg::IsFixed,
  SmallVector<bool, 4> CallOperandIsFixed;
@ -78,6 +95,7 @@ public:
    CCState::AnalyzeCallOperands(Outs, Fn);
    OriginalArgWasF128.clear();
    OriginalArgWasFloat.clear();
+    OriginalArgWasFloatVector.clear();
    CallOperandIsFixed.clear();
  }

@ -96,31 +114,38 @@ public:
    CCState::AnalyzeFormalArguments(Ins, Fn);
    OriginalArgWasFloat.clear();
    OriginalArgWasF128.clear();
+    OriginalArgWasFloatVector.clear();
  }

  void AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
                         CCAssignFn Fn, const Type *RetTy,
                         const char *Func) {
    PreAnalyzeCallResultForF128(Ins, RetTy, Func);
+    PreAnalyzeCallResultForVectorFloat(Ins, RetTy);
    CCState::AnalyzeCallResult(Ins, Fn);
    OriginalArgWasFloat.clear();
    OriginalArgWasF128.clear();
+    OriginalArgWasFloatVector.clear();
  }

  void AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
                     CCAssignFn Fn) {
    PreAnalyzeReturnForF128(Outs);
+    PreAnalyzeReturnForVectorFloat(Outs);
    CCState::AnalyzeReturn(Outs, Fn);
    OriginalArgWasFloat.clear();
    OriginalArgWasF128.clear();
+    OriginalArgWasFloatVector.clear();
  }

  bool CheckReturn(const SmallVectorImpl<ISD::OutputArg> &ArgsFlags,
                   CCAssignFn Fn) {
    PreAnalyzeReturnForF128(ArgsFlags);
+    PreAnalyzeReturnForVectorFloat(ArgsFlags);
    bool Return = CCState::CheckReturn(ArgsFlags, Fn);
    OriginalArgWasFloat.clear();
    OriginalArgWasF128.clear();
+    OriginalArgWasFloatVector.clear();
    return Return;
  }

@ -128,6 +153,12 @@ public:
  bool WasOriginalArgFloat(unsigned ValNo) {
      return OriginalArgWasFloat[ValNo];
  }
+  bool WasOriginalArgVectorFloat(unsigned ValNo) const {
+    return OriginalArgWasFloatVector[ValNo];
+  }
+  bool WasOriginalRetVectorFloat(unsigned ValNo) const {
+    return OriginalRetWasFloatVector[ValNo];
+  }
  bool IsCallOperandFixed(unsigned ValNo) { return CallOperandIsFixed[ValNo]; }
  SpecialCallingConvType getSpecialCallingConv() { return SpecialCallingConv; }
 };
--- a/llvm/lib/Target/Mips/MipsCallingConv.td
+++ b/llvm/lib/Target/Mips/MipsCallingConv.td
@ -37,6 +37,10 @@ class CCIfOrigArgWasF128<CCAction A>
 class CCIfArgIsVarArg<CCAction A>
    : CCIf<"!static_cast<MipsCCState *>(&State)->IsCallOperandFixed(ValNo)", A>;

+/// Match if the return was a floating point vector.
+class CCIfOrigArgWasNotVectorFloat<CCAction A>
+    : CCIf<"!static_cast<MipsCCState *>(&State)"
+                "->WasOriginalRetVectorFloat(ValNo)", A>;

 /// Match if the special calling conv is the specified value.
 class CCIfSpecialCallingConv<string CC, CCAction A>
@ -93,8 +97,10 @@ def RetCC_MipsO32 : CallingConv<[
  // Promote i1/i8/i16 return values to i32.
  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,

-  // i32 are returned in registers V0, V1, A0, A1
-  CCIfType<[i32], CCAssignToReg<[V0, V1, A0, A1]>>,
+  // i32 are returned in registers V0, V1, A0, A1, unless the original return
+  // type was a vector of floats.
+  CCIfOrigArgWasNotVectorFloat<CCIfType<[i32],
+                                        CCAssignToReg<[V0, V1, A0, A1]>>>,

  // f32 are returned in registers F0, F2
  CCIfType<[f32], CCAssignToReg<[F0, F2]>>,
--- a/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp
@ -71,6 +71,48 @@ static bool isShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) {
  return true;
 }

+// The MIPS MSA ABI passes vector arguments in the integer register set.
+// The number of integer registers used is dependant on the ABI used.
+MVT MipsTargetLowering::getRegisterTypeForCallingConv(MVT VT) const {
+  if (VT.isVector() && Subtarget.hasMSA())
+    return Subtarget.isABI_O32() ? MVT::i32 : MVT::i64;
+  return MipsTargetLowering::getRegisterType(VT);
+}
+
+MVT MipsTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
+                                                      EVT VT) const {
+  if (VT.isVector()) {
+      if (Subtarget.isABI_O32()) {
+        return MVT::i32;
+      } else {
+        return (VT.getSizeInBits() == 32) ? MVT::i32 : MVT::i64;
+      }
+  }
+  return MipsTargetLowering::getRegisterType(Context, VT);
+}
+
+unsigned MipsTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
+                                                           EVT VT) const {
+  if (VT.isVector())
+    return std::max((VT.getSizeInBits() / (Subtarget.isABI_O32() ? 32 : 64)),
+                    1U);
+  return MipsTargetLowering::getNumRegisters(Context, VT);
+}
+
+unsigned MipsTargetLowering::getVectorTypeBreakdownForCallingConv(
+    LLVMContext &Context, EVT VT, EVT &IntermediateVT,
+    unsigned &NumIntermediates, MVT &RegisterVT) const {
+
+  // Break down vector types to either 2 i64s or 4 i32s.
+  RegisterVT = getRegisterTypeForCallingConv(Context, VT) ;
+  IntermediateVT = RegisterVT;
+  NumIntermediates = VT.getSizeInBits() < RegisterVT.getSizeInBits()
+                         ? VT.getVectorNumElements()
+                         : VT.getSizeInBits() / RegisterVT.getSizeInBits();
+
+  return NumIntermediates;
+}
+
 SDValue MipsTargetLowering::getGlobalReg(SelectionDAG &DAG, EVT Ty) const {
  MipsFunctionInfo *FI = DAG.getMachineFunction().getInfo<MipsFunctionInfo>();
  return DAG.getRegister(FI->getGlobalBaseReg(), Ty);
@ -2552,6 +2594,11 @@ SDValue MipsTargetLowering::lowerFP_TO_SINT(SDValue Op,
 //       yet to hold an argument. Otherwise, use A2, A3 and stack. If A1 is
 //       not used, it must be shadowed. If only A3 is available, shadow it and
 //       go to stack.
+// vXiX - Received as scalarized i32s, passed in A0 - A3 and the stack.
+// vXf32 - Passed in either a pair of registers {A0, A1}, {A2, A3} or {A0 - A3}
+//         with the remainder spilled to the stack.
+// vXf64 - Passed in either {A0, A1, A2, A3} or {A2, A3} and in both cases
+//         spilling the remainder to the stack.
 //
 //  For vararg functions, all arguments are passed in A0, A1, A2, A3 and stack.
 //===----------------------------------------------------------------------===//
@ -2563,8 +2610,13 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT,
      State.getMachineFunction().getSubtarget());

  static const MCPhysReg IntRegs[] = { Mips::A0, Mips::A1, Mips::A2, Mips::A3 };
+
+  const MipsCCState * MipsState = static_cast<MipsCCState *>(&State);
+
  static const MCPhysReg F32Regs[] = { Mips::F12, Mips::F14 };

+  static const MCPhysReg FloatVectorIntRegs[] = { Mips::A0, Mips::A2 };
+
  // Do not process byval args here.
  if (ArgFlags.isByVal())
    return true;
@ -2602,8 +2654,26 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT,
                                State.getFirstUnallocated(F32Regs) != ValNo;
  unsigned OrigAlign = ArgFlags.getOrigAlign();
  bool isI64 = (ValVT == MVT::i32 && OrigAlign == 8);
+  bool isVectorFloat = MipsState->WasOriginalArgVectorFloat(ValNo);

-  if (ValVT == MVT::i32 || (ValVT == MVT::f32 && AllocateFloatsInIntReg)) {
+  // The MIPS vector ABI for floats passes them in a pair of registers
+  if (ValVT == MVT::i32 && isVectorFloat) {
+    // This is the start of an vector that was scalarized into an unknown number
+    // of components. It doesn't matter how many there are. Allocate one of the
+    // notional 8 byte aligned registers which map onto the argument stack, and
+    // shadow the register lost to alignment requirements.
+    if (ArgFlags.isSplit()) {
+      Reg = State.AllocateReg(FloatVectorIntRegs);
+      if (Reg == Mips::A2)
+        State.AllocateReg(Mips::A1);
+      else if (Reg == 0)
+        State.AllocateReg(Mips::A3);
+    } else {
+      // If we're an intermediate component of the split, we can just attempt to
+      // allocate a register directly.
+      Reg = State.AllocateReg(IntRegs);
+    }
+  } else if (ValVT == MVT::i32 || (ValVT == MVT::f32 && AllocateFloatsInIntReg)) {
    Reg = State.AllocateReg(IntRegs);
    // If this is the first part of an i64 arg,
    // the allocated register must be either A0 or A2.
--- a/llvm/lib/Target/Mips/MipsISelLowering.h
+++ b/llvm/lib/Target/Mips/MipsISelLowering.h
@ -248,6 +248,33 @@ namespace llvm {
    bool isCheapToSpeculateCttz() const override;
    bool isCheapToSpeculateCtlz() const override;

+    /// Return the register type for a given MVT, ensuring vectors are treated
+    /// as a series of gpr sized integers.
+    virtual MVT getRegisterTypeForCallingConv(MVT VT) const override;
+
+    /// Return the register type for a given MVT, ensuring vectors are treated
+    /// as a series of gpr sized integers.
+    virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context,
+                                              EVT VT) const override;
+
+    /// Return the number of registers for a given MVT, ensuring vectors are
+    /// treated as a series of gpr sized integers.
+    virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context,
+                                                   EVT VT) const override;
+
+    /// Break down vectors to the correct number of gpr sized integers.
+    virtual unsigned getVectorTypeBreakdownForCallingConv(
+        LLVMContext &Context, EVT VT, EVT &IntermediateVT,
+        unsigned &NumIntermediates, MVT &RegisterVT) const override;
+
+    /// Return the correct alignment for the current calling convention.
+    virtual unsigned
+    getABIAlignmentForCallingConv(Type *ArgTy, DataLayout DL) const override {
+      if (ArgTy->isVectorTy())
+        return std::min(DL.getABITypeAlignment(ArgTy), 8U);
+      return DL.getABITypeAlignment(ArgTy);
+    }
+
    ISD::NodeType getExtendForAtomicOps() const override {
      return ISD::SIGN_EXTEND;
    }
--- a/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
@ -286,7 +286,9 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,

  DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n"
               << "spOffset   : " << spOffset << "\n"
-               << "stackSize  : " << stackSize << "\n");
+               << "stackSize  : " << stackSize << "\n"
+               << "alignment  : "
+               << MF.getFrameInfo().getObjectAlignment(FrameIndex) << "\n");

  eliminateFI(MI, FIOperandNum, FrameIndex, stackSize, spOffset);
 }
--- a/llvm/test/CodeGen/Mips/cconv/vector.ll
+++ b/llvm/test/CodeGen/Mips/cconv/vector.ll
--- a/llvm/test/CodeGen/Mips/ctlz-v.ll
+++ b/llvm/test/CodeGen/Mips/ctlz-v.ll
@ -8,10 +8,14 @@ entry:
 ; MIPS32: clz     $2, $4
 ; MIPS32: clz     $3, $5

-; MIPS64-DAG: sll $[[A0:[0-9]+]], $4, 0
-; MIPS64-DAG: clz $2, $[[A0]]
-; MIPS64-DAG: sll $[[A1:[0-9]+]], $5, 0
-; MIPS64-DAG: clz $3, $[[A1]]
+; MIPS64-DAG: dsrl $[[A0:[0-9]+]], $4, 32
+; MIPS64-DAG: sll $[[A1:[0-9]+]], $[[A0]], 0
+; MIPS64-DAG: clz $[[R0:[0-9]+]], $[[A1]]
+; MIPS64-DAG: dsll $[[R1:[0-9]+]], $[[R0]], 32
+; MIPS64-DAG: sll $[[A2:[0-9]+]], $4, 0
+; MIPS64-DAG: clz $[[R2:[0-9]+]], $[[A2]]
+; MIPS64-DAG: dext $[[R3:[0-9]+]], $[[R2]], 0, 32
+; MIPS64-DAG: or $2, $[[R3]], $[[R1]]

  %ret = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 true)
  ret <2 x i32> %ret
--- a/llvm/test/CodeGen/Mips/cttz-v.ll
+++ b/llvm/test/CodeGen/Mips/cttz-v.ll
@ -24,14 +24,17 @@ entry:
 ; MIPS64-DAG: and     $[[R2:[0-9]+]], $[[R1]], $[[R0]]
 ; MIPS64-DAG: clz     $[[R3:[0-9]+]], $[[R2]]
 ; MIPS64-DAG: addiu   $[[R4:[0-9]+]], $zero, 32
-; MIPS64-DAG: subu    $2, $[[R4]], $[[R3]]
-; MIPS64-DAG: sll     $[[A1:[0-9]+]], $5, 0
-; MIPS64-DAG: addiu   $[[R5:[0-9]+]], $[[A1]], -1
-; MIPS64-DAG: not     $[[R6:[0-9]+]], $[[A1]]
-; MIPS64-DAG: and     $[[R7:[0-9]+]], $[[R6]], $[[R5]]
-; MIPS64-DAG: clz     $[[R8:[0-9]+]], $[[R7]]
-; MIPS64-DAG: jr      $ra
-; MIPS64-DAG: subu    $3, $[[R4]], $[[R8]]
+; MIPS64-DAG: subu    $[[R5:[0-9]+]], $[[R4]], $[[R3]]
+; MIPS64-DAG: dsrl    $[[R6:[0-9]+]], $4, 32
+; MIPS64-DAG: sll     $[[R7:[0-9]+]], $[[R6]], 0
+; MIPS64-DAG: dext    $[[R8:[0-9]+]], $[[R5]], 0, 32
+; MIPS64-DAG: addiu   $[[R9:[0-9]+]], $[[R7]], -1
+; MIPS64-DAG: not     $[[R10:[0-9]+]], $[[R7]]
+; MIPS64-DAG: and     $[[R11:[0-9]+]], $[[R10]], $[[R9]]
+; MIPS64-DAG: clz     $[[R12:[0-9]+]], $[[R11]]
+; MIPS64-DAG: subu    $[[R13:[0-9]+]], $[[R4]], $[[R12]]
+; MIPS64-DAG: dsll    $[[R14:[0-9]+]], $[[R13]], 32
+; MIPS64-DAG: or      $2, $[[R8]], $[[R14]]

  %ret = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %x, i1 true)
  ret <2 x i32> %ret
--- a/llvm/test/CodeGen/Mips/return-vector.ll
+++ b/llvm/test/CodeGen/Mips/return-vector.ll
@ -128,8 +128,11 @@ entry:

 ; CHECK-LABEL:        call_f2:
 ; CHECK:        call16(f2)
-; CHECK-NOT:    lwc1
-; CHECK:        add.s    $[[R2:[a-z0-9]+]], $[[R0:[a-z0-9]+]], $[[R1:[a-z0-9]+]]
+; CHECK:        addiu $4, $sp, [[O0:[0-9]+]]
+; CHECK-DAG:    lwc1 $f[[F0:[0-9]]], [[O0]]($sp)
+; CHECK-DAG:    lwc1 $f[[F1:[0-9]]], 20($sp)
+; CHECK:        add.s    $f0, $f[[F0]], $f[[F1]]
+
 }


@ -143,12 +146,13 @@ entry:

 ; CHECK-LABEL:        call_d2:
 ; CHECK:        call16(d2)
-; CHECK-NOT:    ldc1
-; CHECK:        add.d    $[[R2:[a-z0-9]+]], $[[R0:[a-z0-9]+]], $[[R1:[a-z0-9]+]]
+; CHECK:        addiu $4, $sp, [[O0:[0-9]+]]
+; CHECK-DAG:    ldc1 $f[[F0:[0-9]+]], 24($sp)
+; CHECK-DAG:    ldc1 $f[[F1:[0-9]+]], [[O0]]($sp)
+; CHECK:        add.d    $f0, $f[[F1]], $f[[F0]]
+
 }

-
-
 ; Check that function returns vector on stack in cases when vector can't be
 ; returned in registers. Also check that vector is placed on stack starting
 ; from the address in register $4.
@ -179,11 +183,12 @@ entry:
  ret <4 x float> %vecins4

 ; CHECK-LABEL:        return_f4:
-; CHECK-DAG:    lwc1    $[[R0:[a-z0-9]+]], 16($sp)
-; CHECK-DAG:    swc1    $[[R0]], 12($4)
+; CHECK-DAG:    lwc1    $f[[R0:[0-9]+]], 16($sp)
+; CHECK-DAG:    swc1    $f[[R0]], 12($4)
 ; CHECK-DAG:    sw      $7, 8($4)
 ; CHECK-DAG:    sw      $6, 4($4)
 ; CHECK-DAG:    sw      $5, 0($4)
+
 }


@ -227,8 +232,8 @@ entry:
  ret <2 x float> %vecins2

 ; CHECK-LABEL:        return_f2:
-; CHECK:        mov.s   $f0, $f12
-; CHECK:        mov.s   $f2, $f14
+; CHECK-DAG:    sw   $5, 0($4)
+; CHECK-DAG:    sw   $6, 4($4)
 }


@ -239,6 +244,10 @@ entry:
  ret <2 x double> %vecins2

 ; CHECK-LABEL:        return_d2:
-; CHECK:        mov.d   $f0, $f12
-; CHECK:        mov.d   $f2, $f14
+; CHECK-DAG:    ldc1 $f[[F0:[0-9]]], 16($sp)
+; CHECK-DAG:    sdc1 $f[[F0]], 8($4)
+; CHECK-DAG:    mtc1 $6, $f[[F1:[0-9]+]]
+; CHECK-DAG:    mtc1 $7, $f
+; CHECK-DAG:    sdc1 $f[[F0]], 0($4)
+
 }