[AIX] Allow vararg calls when all arguments reside in registers

Summary: This patch pushes the AIX vararg unimplemented error diagnostic later and allows vararg calls so long as all the arguments can be passed in register. This patch extends the AIX calling convention implementation to initialize GPR(s) for vararg float arguments. On AIX, both GPR(s) and FPR are allocated for floating point arguments. The GPR(s) are only initialized for vararg calls, otherwise the callee is expected to retrieve the float argument in the FPR. f64 in AIX PPC32 requires special handling in order to allocated and initialize 2 GPRs. This is performed with bitcast, SRL, truncation to initialize one GPR for the MSW and bitcast, truncations to initialize the other GPR for the LSW. A future patch will follow to add support for arguments passed on the stack. Patch provided by: cebowleratibm Reviewers: sfertile, ZarkoCA, hubert.reinterpretcast Differential Revision: https://reviews.llvm.org/D71013
2020-01-10 16:15:46 +00:00 · 2020-01-10 16:15:46 +00:00 · dfed052fb3
parent ff92e469ca
commit dfed052fb3
3 changed files with 377 additions and 25 deletions
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@ -6831,6 +6831,9 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
  if (ArgFlags.isNest())
    report_fatal_error("Nest arguments are unimplemented.");

+  if (ValVT.isVector() || LocVT.isVector())
+    report_fatal_error("Vector arguments are unimplemented on AIX.");
+
  const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
      State.getMachineFunction().getSubtarget());
  const bool IsPPC64 = Subtarget.isPPC64();
@ -6875,18 +6878,33 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
    // This includes f64 in 64-bit mode for ABI compatibility.
    State.AllocateStack(IsPPC64 ? 8 : StoreSize, 4);
    if (unsigned Reg = State.AllocateReg(FPR))
-      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, MVT::f64, LocInfo));
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
    else
      report_fatal_error("Handling of placing parameters on the stack is "
                         "unimplemented!");

-    // f32 reserves 1 GPR in both PPC32 and PPC64.
-    // f64 reserves 2 GPRs in PPC32 and 1 GPR in PPC64.
-    for (unsigned i = 0; i < StoreSize; i += PtrByteSize)
-      State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32);
+    // AIX requires that GPRs are reserved for float arguments.
+    // Successfully reserved GPRs are only initialized for vararg calls.
+    MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
+    for (unsigned I = 0; I < StoreSize; I += PtrByteSize) {
+      if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
+        if (State.isVarArg()) {
+          // Custom handling is required for:
+          //   f64 in PPC32 needs to be split into 2 GPRs.
+          //   f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
+          State.addLoc(
+              CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
+        }
+      } else if (State.isVarArg()) {
+        report_fatal_error("Handling of placing parameters on the stack is "
+                           "unimplemented!");
+      }
+    }
+
    return false;
  }
  }
+  return true;
 }

 static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT,
@ -7013,7 +7031,7 @@ SDValue PPCTargetLowering::LowerCall_AIX(
          CallConv == CallingConv::Cold ||
          CallConv == CallingConv::Fast) && "Unexpected calling convention!");

-  if (isVarArg || isPatchPoint)
+  if (isPatchPoint)
    report_fatal_error("This call type is unimplemented on AIX.");

  const PPCSubtarget& Subtarget =
@ -7032,7 +7050,8 @@ SDValue PPCTargetLowering::LowerCall_AIX(
  //   [SP][CR][LR][2 x reserved][TOC].
  // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
  const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
-  const unsigned PtrByteSize = Subtarget.isPPC64() ? 8 : 4;
+  const bool IsPPC64 = Subtarget.isPPC64();
+  const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
  CCInfo.AllocateStack(LinkageSize, PtrByteSize);
  CCInfo.AnalyzeCallOperands(Outs, CC_AIX);

@ -7052,26 +7071,70 @@ SDValue PPCTargetLowering::LowerCall_AIX(

  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;

-  for (CCValAssign &VA : ArgLocs) {
-    SDValue Arg = OutVals[VA.getValNo()];
-
-    switch (VA.getLocInfo()) {
-    default: report_fatal_error("Unexpected argument extension type.");
-    case CCValAssign::Full: break;
-    case CCValAssign::ZExt:
-      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
-      break;
-    case CCValAssign::SExt:
-      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
-      break;
-    }
-
-    if (VA.isRegLoc())
-      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+  for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
+    CCValAssign &VA = ArgLocs[I++];

    if (VA.isMemLoc())
      report_fatal_error("Handling of placing parameters on the stack is "
                         "unimplemented!");
+    if (!VA.isRegLoc())
+      report_fatal_error(
+          "Unexpected non-register location for function call argument.");
+
+    SDValue Arg = OutVals[VA.getValNo()];
+
+    if (!VA.needsCustom()) {
+      switch (VA.getLocInfo()) {
+      default:
+        report_fatal_error("Unexpected argument extension type.");
+      case CCValAssign::Full:
+        break;
+      case CCValAssign::ZExt:
+        Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+      case CCValAssign::SExt:
+        Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+      }
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+
+      continue;
+    }
+
+    // Custom handling is used for GPR initializations for vararg float
+    // arguments.
+    assert(isVarArg && VA.getValVT().isFloatingPoint() &&
+           VA.getLocVT().isInteger() &&
+           "Unexpected custom register handling for calling convention.");
+
+    SDValue ArgAsInt =
+        DAG.getBitcast(MVT::getIntegerVT(VA.getValVT().getSizeInBits()), Arg);
+
+    if (Arg.getValueType().getStoreSize() == VA.getLocVT().getStoreSize())
+      // f32 in 32-bit GPR
+      // f64 in 64-bit GPR
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
+    else if (Arg.getValueType().getSizeInBits() < VA.getLocVT().getSizeInBits())
+      // f32 in 64-bit GPR.
+      RegsToPass.push_back(std::make_pair(
+          VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, VA.getLocVT())));
+    else {
+      // f64 in two 32-bit GPRs
+      // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
+      assert(Arg.getValueType() == MVT::f64 && isVarArg && !IsPPC64 &&
+             "Unexpected custom register for argument!");
+      CCValAssign &GPR1 = VA;
+      SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
+                                     DAG.getConstant(32, dl, MVT::i8));
+      RegsToPass.push_back(std::make_pair(
+          GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
+      assert(I != E && "A second custom GPR is expected!");
+      CCValAssign &GPR2 = ArgLocs[I++];
+      assert(GPR2.isRegLoc() && GPR2.getValNo() == GPR1.getValNo() &&
+             GPR2.needsCustom() && "A second custom GPR is expected!");
+      RegsToPass.push_back(std::make_pair(
+          GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
+    }
  }

  // For indirect calls, we need to save the TOC base to the stack for
--- a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
@ -1,9 +1,17 @@
 ; RUN: llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp -verify-machineinstrs < %s | \
 ; RUN: FileCheck --check-prefixes=CHECK,32BIT %s

+; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec \
+; RUN:  -mtriple powerpc-ibm-aix-xcoff < %s | \
+; RUN: FileCheck --check-prefixes=CHECKASM,ASM32PWR4 %s
+
 ; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp -verify-machineinstrs < %s | \
 ; RUN: FileCheck --check-prefixes=CHECK,64BIT %s

+; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec \
+; RUN:  -mtriple powerpc64-ibm-aix-xcoff < %s | \
+; RUN: FileCheck --check-prefixes=CHECKASM,ASM64PWR4 %s
+
 define void @call_test_chars() {
 entry:
  call i8 @test_chars(i8 signext 97, i8 signext 97, i8 signext 97, i8 signext 97)
@ -148,7 +156,6 @@ entry:
  call void @test_i1(i1 1)
  ret void
 }
-
 ; CHECK-LABEL: name: call_test_i1

 ; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
@ -251,7 +258,6 @@ entry:
  ret void
 }

-
 ; CHECK-LABEL: name: call_test_i64

 ; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
@ -612,3 +618,263 @@ entry:
 ; 64BIT:        body:             |
 ; 64BIT-NEXT:    bb.0.entry:
 ; 64BIT-NEXT:     liveins: $x3, $x4, $x5, $x6, $x7
+
+define void @call_test_vararg() {
+entry:
+  %0 = load float, float* @f1, align 4
+  %conv = fpext float %0 to double
+  %1 = load double, double* @d1, align 8
+  call void (i32, ...) @test_vararg(i32 42, double %conv, double %1)
+  ret void
+}
+
+declare void @test_vararg(i32, ...)
+
+; CHECK-LABEL:     name: call_test_vararg
+
+; 32BIT:      renamable $r[[REG:[0-9]+]] = LWZtoc @f1, $r2 :: (load 4 from got)
+; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG]] :: (dereferenceable load 4 from @f1)
+; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @d1, $r2 :: (load 4 from got)
+; 32BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store 8 into %stack.[[SLOT1]])
+; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1)
+; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]], align 8)
+; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]] + 4)
+; 32BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store 8 into %stack.[[SLOT2]])
+; 32BIT-NEXT: renamable $r6 = LWZ 0, %stack.[[SLOT2]] :: (load 4 from %stack.[[SLOT2]], align 8)
+; 32BIT-NEXT: renamable $r7 = LWZ 4, %stack.[[SLOT2]] :: (load 4 from %stack.[[SLOT2]] + 4)
+; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
+; 32BIT-NEXT: $r3 = LI 42
+; 32BIT-NEXT: BL_NOP <mcsymbol .test_vararg>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $f1, implicit $r4, implicit $r5, implicit $f2, implicit $r6, implicit $r7, implicit $r2, implicit-def $r1
+; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
+
+; CHECKASM-LABEL: .call_test_vararg:
+
+; ASM32PWR4:      stwu 1, -80(1)
+; ASM32PWR4-NEXT: lwz [[REG:[0-9]+]], LC1(2)
+; ASM32PWR4-NEXT: lfs 1, 0([[REG]])
+; ASM32PWR4-NEXT: lwz [[REG:[0-9]+]], LC2(2)
+; ASM32PWR4-NEXT: stfd 1, 64(1)
+; ASM32PWR4-NEXT: lfd 2, 0([[REG]])
+; ASM32PWR4-NEXT: li 3, 42
+; ASM32PWR4-NEXT: stfd 2, 72(1)
+; ASM32PWR4-DAG:  lwz 4, 64(1)
+; ASM32PWR4-DAG:  lwz 5, 68(1)
+; ASM32PWR4-DAG:  lwz 6, 72(1)
+; ASM32PWR4-DAG:  lwz 7, 76(1)
+; ASM32PWR4-NEXT: bl .test_vararg
+; ASM32PWR4-NEXT: nop
+
+; 64BIT:      renamable $x[[REG:[0-9]+]] = LDtoc @f1, $x2 :: (load 8 from got)
+; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x[[REG]] :: (dereferenceable load 4 from @f1)
+; 64BIT-NEXT: renamable $x[[REG:[0-9]+]] = LDtoc @d1, $x2 :: (load 8 from got)
+; 64BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store 8 into %stack.[[SLOT1]])
+; 64BIT-NEXT: renamable $f2 = LFD 0, killed renamable $x[[REG]] :: (dereferenceable load 8 from @d1)
+; 64BIT-NEXT: renamable $x4 = LD 0, %stack.[[SLOT1]] :: (load 8 from %stack.[[SLOT1]])
+; 64BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store 8 into %stack.[[SLOT2]])
+; 64BIT-NEXT: renamable $x5 = LD 0, %stack.[[SLOT2]] :: (load 8 from %stack.[[SLOT2]])
+; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+; 64BIT-NEXT: $x3 = LI8 42
+; 64BIT-NEXT: BL8_NOP <mcsymbol .test_vararg>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $f1, implicit $x4, implicit $f2, implicit $x5, implicit $x2, implicit-def $r1
+; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+
+; ASM64PWR4:      stdu 1, -128(1)
+; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], LC1(2)
+; ASM64PWR4-NEXT: lfs 1, 0([[REG]])
+; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], LC2(2)
+; ASM64PWR4-NEXT: stfd 1, 112(1)
+; ASM64PWR4-NEXT: lfd 2, 0([[REG]])
+; ASM64PWR4-NEXT: li 3, 42
+; ASM64PWR4-NEXT: stfd 2, 120(1)
+; ASM64PWR4-NEXT: ld 4, 112(1)
+; ASM64PWR4-NEXT: ld 5, 120(1)
+; ASM64PWR4-NEXT: bl .test_vararg
+; ASM64PWR4-NEXT: nop
+
+define void @call_test_vararg2() {
+entry:
+  %0 = load float, float* @f1, align 4
+  %conv = fpext float %0 to double
+  %1 = load double, double* @d1, align 8
+  call void (i32, ...) @test_vararg(i32 42, double %conv, i32 42, double %1)
+  ret void
+}
+
+; CHECK-LABEL:     name: call_test_vararg2
+
+; 32BIT:      renamable $r[[REG:[0-9]+]] = LWZtoc @f1, $r2 :: (load 4 from got)
+; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG]] :: (dereferenceable load 4 from @f1)
+; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @d1, $r2 :: (load 4 from got)
+; 32BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store 8 into %stack.[[SLOT1]])
+; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1)
+; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]], align 8)
+; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]] + 4)
+; 32BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store 8 into %stack.[[SLOT2]])
+; 32BIT-NEXT: renamable $r7 = LWZ 0, %stack.[[SLOT2]] :: (load 4 from %stack.[[SLOT2]], align 8)
+; 32BIT-NEXT: renamable $r8 = LWZ 4, %stack.[[SLOT2]] :: (load 4 from %stack.[[SLOT2]] + 4)
+; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
+; 32BIT-NEXT: $r3 = LI 42
+; 32BIT-NEXT: $r6 = LI 42
+; 32BIT-NEXT: BL_NOP <mcsymbol .test_vararg>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $f1, implicit $r4, implicit $r5, implicit killed $r6, implicit $f2, implicit $r7, implicit $r8, implicit $r2, implicit-def $r1
+; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
+
+; ASM32PWR4:      stwu 1, -80(1)
+; ASM32PWR4-NEXT: lwz [[REG:[0-9]+]], LC1(2)
+; ASM32PWR4-NEXT: li 6, 42
+; ASM32PWR4-NEXT: lfs 1, 0([[REG]])
+; ASM32PWR4-NEXT: lwz [[REG:[0-9]+]], LC2(2)
+; ASM32PWR4-NEXT: stfd 1, 64(1)
+; ASM32PWR4-NEXT: lfd 2, 0([[REG]])
+; ASM32PWR4-NEXT: li 3, 42
+; ASM32PWR4-NEXT: stfd 2, 72(1)
+; ASM32PWR4-DAG: lwz 4, 64(1)
+; ASM32PWR4-DAG: lwz 5, 68(1)
+; ASM32PWR4-DAG: lwz 7, 72(1)
+; ASM32PWR4-DAG: lwz 8, 76(1)
+; ASM32PWR4-NEXT: bl .test_vararg
+; ASM32PWR4-NEXT: nop
+
+; 64BIT:      renamable $x[[REG:[0-9]+]] = LDtoc @f1, $x2 :: (load 8 from got)
+; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x[[REG]] :: (dereferenceable load 4 from @f1)
+; 64BIT-NEXT: renamable $x[[REG:[0-9]+]] = LDtoc @d1, $x2 :: (load 8 from got)
+; 64BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store 8 into %stack.[[SLOT1]])
+; 64BIT-NEXT: renamable $f2 = LFD 0, killed renamable $x[[REG]] :: (dereferenceable load 8 from @d1)
+; 64BIT-NEXT: renamable $x4 = LD 0, %stack.[[SLOT1]] :: (load 8 from %stack.[[SLOT1]])
+; 64BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store 8 into %stack.[[SLOT2]])
+; 64BIT-NEXT: renamable $x6 = LD 0, %stack.[[SLOT2]] :: (load 8 from %stack.[[SLOT2]])
+; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+; 64BIT-NEXT: $x3 = LI8 42
+; 64BIT-NEXT: $x5 = LI8 42
+; 64BIT-NEXT: BL8_NOP <mcsymbol .test_vararg>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $f1, implicit $x4, implicit killed $x5, implicit $f2, implicit $x6, implicit $x2, implicit-def $r1
+; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+
+; ASM64PWR4:      stdu 1, -128(1)
+; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], LC1(2)
+; ASM64PWR4-NEXT: li 5, 42
+; ASM64PWR4-NEXT: lfs 1, 0([[REG]])
+; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], LC2(2)
+; ASM64PWR4-NEXT: stfd 1, 112(1)
+; ASM64PWR4-NEXT: lfd 2, 0([[REG]])
+; ASM64PWR4-NEXT: li 3, 42
+; ASM64PWR4-NEXT: stfd 2, 120(1)
+; ASM64PWR4-NEXT: ld 4, 112(1)
+; ASM64PWR4-NEXT: ld 6, 120(1)
+; ASM64PWR4-NEXT: bl .test_vararg
+; ASM64PWR4-NEXT: nop
+
+define void @call_test_vararg3() {
+entry:
+  %0 = load float, float* @f1, align 4
+  %conv = fpext float %0 to double
+  %1 = load double, double* @d1, align 8
+  call void (i32, ...) @test_vararg(i32 42, double %conv, i64 42, double %1)
+  ret void
+}
+
+; CHECK-LABEL:     name: call_test_vararg3
+
+; 32BIT:      renamable $r[[REG:[0-9]+]] = LWZtoc @f1, $r2 :: (load 4 from got)
+; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG]] :: (dereferenceable load 4 from @f1)
+; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @d1, $r2 :: (load 4 from got)
+; 32BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store 8 into %stack.[[SLOT1]])
+; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1)
+; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]], align 8)
+; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]] + 4)
+; 32BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store 8 into %stack.[[SLOT2]])
+; 32BIT-NEXT: renamable $r8 = LWZ 0, %stack.[[SLOT2]] :: (load 4 from %stack.[[SLOT2]], align 8)
+; 32BIT-NEXT: renamable $r9 = LWZ 4, %stack.[[SLOT2]] :: (load 4 from %stack.[[SLOT2]] + 4)
+; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
+; 32BIT-NEXT: $r3 = LI 42
+; 32BIT-NEXT: $r6 = LI 0
+; 32BIT-NEXT: $r7 = LI 42
+; 32BIT-NEXT: BL_NOP <mcsymbol .test_vararg>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $f1, implicit $r4, implicit $r5, implicit killed $r6, implicit killed $r7, implicit $f2, implicit $r8, implicit $r9, implicit $r2, implicit-def $r1
+; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
+
+; ASM32PWR4:      stwu 1, -80(1)
+; ASM32PWR4-NEXT: lwz [[REG:[0-9]+]], LC1(2)
+; ASM32PWR4-DAG:  li 6, 0
+; ASM32PWR4-DAG:  li 7, 42
+; ASM32PWR4-NEXT: lfs 1, 0([[REG]])
+; ASM32PWR4-NEXT: lwz [[REG:[0-9]+]], LC2(2)
+; ASM32PWR4-NEXT: stfd 1, 64(1)
+; ASM32PWR4-NEXT: lfd 2, 0([[REG]])
+; ASM32PWR4-NEXT: li 3, 42
+; ASM32PWR4-NEXT: stfd 2, 72(1)
+; ASM32PWR4-DAG:  lwz 4, 64(1)
+; ASM32PWR4-DAG:  lwz 5, 68(1)
+; ASM32PWR4-DAG:  lwz 8, 72(1)
+; ASM32PWR4-DAG:  lwz 9, 76(1)
+; ASM32PWR4-NEXT: bl .test_vararg
+; ASM32PWR4-NEXT: nop
+
+; 64BIT:      renamable $x[[REG:[0-9]+]] = LDtoc @f1, $x2 :: (load 8 from got)
+; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x[[REG]] :: (dereferenceable load 4 from @f1)
+; 64BIT-NEXT: renamable $x[[REG:[0-9]+]] = LDtoc @d1, $x2 :: (load 8 from got)
+; 64BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store 8 into %stack.[[SLOT1]])
+; 64BIT-NEXT: renamable $f2 = LFD 0, killed renamable $x[[REG]] :: (dereferenceable load 8 from @d1)
+; 64BIT-NEXT: renamable $x4 = LD 0, %stack.[[SLOT1]] :: (load 8 from %stack.[[SLOT1]])
+; 64BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store 8 into %stack.[[SLOT2]])
+; 64BIT-NEXT: renamable $x6 = LD 0, %stack.[[SLOT2]] :: (load 8 from %stack.[[SLOT2]])
+; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+; 64BIT-NEXT: $x3 = LI8 42
+; 64BIT-NEXT: $x5 = LI8 42
+; 64BIT-NEXT: BL8_NOP <mcsymbol .test_vararg>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $f1, implicit $x4, implicit killed $x5, implicit $f2, implicit $x6, implicit $x2, implicit-def $r1
+; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+
+; ASM64PWR4:      stdu 1, -128(1)
+; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], LC1(2)
+; ASM64PWR4-NEXT: li 5, 42
+; ASM64PWR4-NEXT: lfs 1, 0([[REG]])
+; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], LC2(2)
+; ASM64PWR4-NEXT: stfd 1, 112(1)
+; ASM64PWR4-NEXT: lfd 2, 0([[REG]])
+; ASM64PWR4-NEXT: li 3, 42
+; ASM64PWR4-NEXT: stfd 2, 120(1)
+; ASM64PWR4-DAG: ld 4, 112(1)
+; ASM64PWR4-DAG: ld 6, 120(1)
+; ASM64PWR4-NEXT: bl .test_vararg
+; ASM64PWR4-NEXT: nop
+
+define void @call_test_vararg4() {
+entry:
+  %0 = load float, float* @f1, align 4
+  call void (i32, ...) @test_vararg(i32 42, float %0)
+  ret void
+}
+
+; CHECK-LABEL:     name: call_test_vararg4
+
+; 32BIT:      renamable $r[[REG:[0-9]+]] = LWZtoc @f1, $r2 :: (load 4 from got)
+; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG]] :: (dereferenceable load 4 from @f1)
+; 32BIT-NEXT: STFS renamable $f1, 0, %stack.[[SLOT:[0-9]+]] :: (store 4 into %stack.[[SLOT]])
+; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.[[SLOT]] :: (load 4 from %stack.[[SLOT]])
+; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
+; 32BIT-NEXT: $r3 = LI 42
+; 32BIT-NEXT: BL_NOP <mcsymbol .test_vararg>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $f1, implicit $r4, implicit $r2, implicit-def $r1
+; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
+
+; ASM32PWR4:      stwu 1, -64(1)
+; ASM32PWR4-NEXT: lwz [[REG:[0-9]+]], LC1(2)
+; ASM32PWR4-NEXT: lfs 1, 0([[REG]])
+; ASM32PWR4-NEXT: li 3, 42
+; ASM32PWR4-NEXT: stfs 1, 60(1)
+; ASM32PWR4-NEXT: lwz 4, 60(1)
+; ASM32PWR4-NEXT: bl .test_vararg
+; ASM32PWR4-NEXT: nop
+
+; 64BIT:      renamable $x[[REG:[0-9]+]] = LDtoc @f1, $x2 :: (load 8 from got)
+; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x[[REG]] :: (dereferenceable load 4 from @f1)
+; 64BIT-NEXT: STFS renamable $f1, 0, %stack.[[SLOT:[0-9]+]] :: (store 4 into %stack.[[SLOT]])
+; 64BIT-NEXT: renamable $x4 = LWZ8 0, %stack.[[SLOT]] :: (load 4 from %stack.[[SLOT]])
+; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+; 64BIT-NEXT: $x3 = LI8 42
+; 64BIT-NEXT: BL8_NOP <mcsymbol .test_vararg>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $f1, implicit $x4, implicit $x2, implicit-def $r1
+; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+
+; ASM64PWR4:      stdu 1, -128(1)
+; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], LC1(2)
+; ASM64PWR4-NEXT: lfs 1, 0([[REG]])
+; ASM64PWR4-NEXT: li 3, 42
+; ASM64PWR4-NEXT: stfs 1, 124(1)
+; ASM64PWR4-NEXT: lwz 4, 124(1)
+; ASM64PWR4-NEXT: bl .test_vararg
+; ASM64PWR4-NEXT: nop
--- a/llvm/test/CodeGen/PowerPC/aix-cc-altivec.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-altivec.ll
@ -0,0 +1,23 @@
+; RUN: not llc < %s -mtriple powerpc64-ibm-aix-xcoff -mcpu=pwr8 2>&1 | FileCheck %s
+; RUN: not llc < %s -mtriple powerpc-ibm-aix-xcoff -mcpu=pwr8 2>&1 | FileCheck %s
+
+; This test expects a compiler diagnostic for an AIX limitation on Altivec
+; support.  When the Altivec limitation diagnostic is removed, this test
+; should compile clean and fail in order to alert the author to validate the
+; instructions emitted to initialize the GPR for the double vararg.
+; The mfvsrwz and mfvsrd instructions should be used to initialize the GPR for
+; the double vararg without going through memory.
+
+@f1 = global float 0.000000e+00, align 4
+
+define void @call_test_vararg() {
+entry:
+  %0 = load float, float* @f1, align 4
+  %conv = fpext float %0 to double
+  call void (i32, ...) @test_vararg(i32 42, double %conv, float %0)
+  ret void
+}
+
+declare void @test_vararg(i32, ...)
+
+; CHECK: LLVM ERROR: Altivec support is unimplemented on AIX.