Allow sret on the second parameter as well as the first

MSVC always places the implicit sret parameter after the implicit this parameter of instance methods. We used to handle this for x86_thiscallcc by allocating the sret parameter on the stack and leaving the this pointer in ecx, but that doesn't handle alternative calling conventions like cdecl, stdcall, fastcall, or the win64 convention. Instead, change the verifier to allow sret on the second parameter. This also requires changing the Mips and X86 backends to return the argument with the sret parameter, instead of assuming that the sret parameter comes first. The Sparc backend also returns sret parameters in a register, but I wasn't able to update it to handle secondary sret parameters. It currently calls report_fatal_error if you feed it an sret in the second parameter. Reviewers: rafael.espindola, majnemer Differential Revision: http://reviews.llvm.org/D3617 llvm-svn: 208453
2014-05-09 22:32:13 +00:00 · 2014-05-09 22:32:13 +00:00 · 7941856445
parent 629d201c97
commit 7941856445
10 changed files with 139 additions and 47 deletions
--- a/llvm/include/llvm/IR/Function.h
+++ b/llvm/include/llvm/IR/Function.h
@ -297,7 +297,8 @@ public:
  /// @brief Determine if the function returns a structure through first
  /// pointer argument.
  bool hasStructRetAttr() const {
-    return AttributeSets.hasAttribute(1, Attribute::StructRet);
+    return AttributeSets.hasAttribute(1, Attribute::StructRet) ||
+           AttributeSets.hasAttribute(2, Attribute::StructRet);
  }

  /// @brief Determine if the parameter does not alias other parameters.
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@ -207,10 +207,10 @@ void Function::eraseFromParent() {
 // Function Implementation
 //===----------------------------------------------------------------------===//

-Function::Function(FunctionType *Ty, LinkageTypes Linkage, const Twine &name,
-                   Module *ParentModule)
-    : GlobalValue(PointerType::getUnqual(Ty), Value::FunctionVal, nullptr, 0,
-                  Linkage, name) {
+Function::Function(FunctionType *Ty, LinkageTypes Linkage,
+                   const Twine &name, Module *ParentModule)
+  : GlobalValue(PointerType::getUnqual(Ty),
+                Value::FunctionVal, nullptr, 0, Linkage, name) {
  assert(FunctionType::isValidReturnType(getReturnType()) &&
         "invalid return type");
  SymTab = new ValueSymbolTable();
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@ -820,6 +820,7 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT, AttributeSet Attrs,

  bool SawNest = false;
  bool SawReturned = false;
+  bool SawSRet = false;

  for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
    unsigned Idx = Attrs.getSlotIndex(i);
@ -850,8 +851,12 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT, AttributeSet Attrs,
      SawReturned = true;
    }

-    if (Attrs.hasAttribute(Idx, Attribute::StructRet))
-      Assert1(Idx == 1, "Attribute sret is not on first parameter!", V);
+    if (Attrs.hasAttribute(Idx, Attribute::StructRet)) {
+      Assert1(!SawSRet, "Cannot have multiple 'sret' parameters!", V);
+      Assert1(Idx == 1 || Idx == 2,
+              "Attribute 'sret' is not on first or second parameter!", V);
+      SawSRet = true;
+    }

    if (Attrs.hasAttribute(Idx, Attribute::InAlloca)) {
      Assert1(Idx == FT->getNumParams(),
--- a/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp
@ -2659,20 +2659,21 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
      InVals.push_back(Load);
      OutChains.push_back(Load.getValue(1));
    }
-  }

-  // The mips ABIs for returning structs by value requires that we copy
-  // the sret argument into $v0 for the return. Save the argument into
-  // a virtual register so that we can access it from the return points.
-  if (DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
-    unsigned Reg = MipsFI->getSRetReturnReg();
-    if (!Reg) {
-      Reg = MF.getRegInfo().createVirtualRegister(
-          getRegClassFor(isN64() ? MVT::i64 : MVT::i32));
-      MipsFI->setSRetReturnReg(Reg);
+    // The mips ABIs for returning structs by value requires that we copy
+    // the sret argument into $v0 for the return. Save the argument into
+    // a virtual register so that we can access it from the return points.
+    if (Flags.isSRet()) {
+      unsigned Reg = MipsFI->getSRetReturnReg();
+      if (!Reg) {
+        Reg = MF.getRegInfo().createVirtualRegister(
+            getRegClassFor(isN64() ? MVT::i64 : MVT::i32));
+        MipsFI->setSRetReturnReg(Reg);
+      }
+      SDValue Copy =
+          DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals.back());
+      Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain);
    }
-    SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[0]);
-    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain);
  }

  if (IsVarArg)
--- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@ -355,10 +355,13 @@ LowerFormalArguments_32(SDValue Chain,

  const unsigned StackOffset = 92;

-  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+  unsigned InIdx = 0;
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i, ++InIdx) {
    CCValAssign &VA = ArgLocs[i];

-    if (i == 0  && Ins[i].Flags.isSRet()) {
+    if (Ins[InIdx].Flags.isSRet()) {
+      if (InIdx != 0)
+        report_fatal_error("sparc only supports sret on the first parameter");
      // Get SRet from [%fp+64].
      int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, 64, true);
      SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@ -2299,24 +2299,24 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
                             MachinePointerInfo(), false, false, false, 0);

    InVals.push_back(ArgValue);
-  }

-  // The x86-64 ABIs require that for returning structs by value we copy
-  // the sret argument into %rax/%eax (depending on ABI) for the return.
-  // Win32 requires us to put the sret argument to %eax as well.
-  // Save the argument into a virtual register so that we can access it
-  // from the return points.
-  if (MF.getFunction()->hasStructRetAttr() &&
-      (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC())) {
-    X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
-    unsigned Reg = FuncInfo->getSRetReturnReg();
-    if (!Reg) {
-      MVT PtrTy = getPointerTy();
-      Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
-      FuncInfo->setSRetReturnReg(Reg);
+    // The x86-64 ABIs require that for returning structs by value we copy
+    // the sret argument into %rax/%eax (depending on ABI) for the return.
+    // Win32 requires us to put the sret argument to %eax as well.
+    // Save the argument into a virtual register so that we can access it
+    // from the return points.
+    if (Ins[i].Flags.isSRet() &&
+        (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC())) {
+      unsigned Reg = FuncInfo->getSRetReturnReg();
+      if (!Reg) {
+        MVT PtrTy = getPointerTy();
+        Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
+        FuncInfo->setSRetReturnReg(Reg);
+      }
+      SDValue Copy =
+          DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals.back());
+      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
    }
-    SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]);
-    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
  }

  unsigned StackSize = CCInfo.getNextStackOffset();
--- a/llvm/test/CodeGen/Mips/mips64-sret.ll
+++ b/llvm/test/CodeGen/Mips/mips64-sret.ll
@ -1,16 +1,23 @@
-; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -O3 < %s | FileCheck %s
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 < %s | FileCheck %s

-%struct.S = type { [8 x i32] }
-
-@g = common global %struct.S zeroinitializer, align 4
-
-define void @f(%struct.S* noalias sret %agg.result) nounwind {
+define void @foo(i32* noalias sret %agg.result) nounwind {
 entry:
-; CHECK: move $2, $4
+; CHECK-LABEL: foo:
+; CHECK: sw {{.*}}, 0($4)
+; CHECK: jr $ra
+; CHECK-NEXT: move $2, $4

-  %0 = bitcast %struct.S* %agg.result to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.S* @g to i8*), i64 32, i32 4, i1 false)
+  store i32 42, i32* %agg.result
  ret void
 }

-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+define void @bar(i32 %v, i32* noalias sret %agg.result) nounwind {
+entry:
+; CHECK-LABEL: bar:
+; CHECK: sw $4, 0($5)
+; CHECK: jr $ra
+; CHECK-NEXT: move $2, $5
+
+  store i32 %v, i32* %agg.result
+  ret void
+}
--- a/llvm/test/CodeGen/SPARC/sret-secondary.ll
+++ b/llvm/test/CodeGen/SPARC/sret-secondary.ll
@ -0,0 +1,8 @@
+; RUN: not llc -march=sparc < %s -o /dev/null 2>&1 | FileCheck %s
+
+; CHECK: sparc only supports sret on the first parameter
+
+define void @foo(i32 %a, i32* sret %out) {
+  store i32 %a, i32* %out
+  ret void
+}
--- a/llvm/test/CodeGen/X86/win32_sret.ll
+++ b/llvm/test/CodeGen/X86/win32_sret.ll
@ -181,3 +181,63 @@ define void @test6_f(%struct.test6* %x) nounwind {
  ret void
 }
 declare x86_thiscallcc void @test6_g(%struct.test6* sret, %struct.test6*)
+
+; Flipping the parameters at the IR level generates the same code.
+%struct.test7 = type { i32, i32, i32 }
+define void @test7_f(%struct.test7* %x) nounwind {
+; WIN32-LABEL: _test7_f:
+; MINGW_X86-LABEL: _test7_f:
+; CYGWIN-LABEL: _test7_f:
+; LINUX-LABEL: test7_f:
+
+; The %x argument is moved to %ecx on all OSs. It will be the this pointer.
+; WIN32:      movl    8(%ebp), %ecx
+; MINGW_X86:  movl    8(%ebp), %ecx
+; CYGWIN:     movl    8(%ebp), %ecx
+
+; The sret pointer is (%esp)
+; WIN32:          leal    8(%esp), %[[REG:e[a-d]x]]
+; WIN32-NEXT:     movl    %[[REG]], (%e{{([a-d]x)|(sp)}})
+; MINGW_X86:      leal    8(%esp), %[[REG:e[a-d]x]]
+; MINGW_X86-NEXT: movl    %[[REG]], (%e{{([a-d]x)|(sp)}})
+; CYGWIN:         leal    8(%esp), %[[REG:e[a-d]x]]
+; CYGWIN-NEXT:    movl    %[[REG]], (%e{{([a-d]x)|(sp)}})
+
+  %tmp = alloca %struct.test7, align 4
+  call x86_thiscallcc void @test7_g(%struct.test7* %x, %struct.test7* sret %tmp)
+  ret void
+}
+
+define x86_thiscallcc void @test7_g(%struct.test7* %in, %struct.test7* sret %out) {
+  %s = getelementptr %struct.test7* %in, i32 0, i32 0
+  %d = getelementptr %struct.test7* %out, i32 0, i32 0
+  %v = load i32* %s
+  store i32 %v, i32* %d
+  call void @clobber_eax()
+  ret void
+
+; Make sure we return the second parameter in %eax.
+; WIN32-LABEL: _test7_g:
+; WIN32: calll _clobber_eax
+; WIN32: movl {{.*}}, %eax
+; WIN32: retl
+}
+
+declare void @clobber_eax()
+
+; Test what happens if the first parameter has to be split by codegen.
+; Realistically, no frontend will generate code like this, but here it is for
+; completeness.
+define void @test8_f(i64 inreg %a, i64* sret %out) {
+  store i64 %a, i64* %out
+  call void @clobber_eax()
+  ret void
+
+; WIN32-LABEL: _test8_f:
+; WIN32: movl {{[0-9]+}}(%esp), %[[out:[a-z]+]]
+; WIN32-DAG: movl %edx, 4(%[[out]])
+; WIN32-DAG: movl %eax, (%[[out]])
+; WIN32: calll _clobber_eax
+; WIN32: movl {{.*}}, %eax
+; WIN32: retl
+}
--- a/llvm/test/Verifier/sret.ll
+++ b/llvm/test/Verifier/sret.ll
@ -0,0 +1,7 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+declare void @a(i32* sret %a, i32* sret %b)
+; CHECK: Cannot have multiple 'sret' parameters!
+
+declare void @b(i32* %a, i32* %b, i32* sret %c)
+; CHECK: Attribute 'sret' is not on first or second parameter!