diff --git a/llvm/lib/Target/X86/README-X86-64.txt b/llvm/lib/Target/X86/README-X86-64.txt index 359b83d01a6e..594a66f832b6 100644 --- a/llvm/lib/Target/X86/README-X86-64.txt +++ b/llvm/lib/Target/X86/README-X86-64.txt @@ -236,3 +236,24 @@ on the result of the movb). //===---------------------------------------------------------------------===// +The x86-64 ABI for hidden-argument struct returns requires that the +incoming value of %rdi be copied into %rax by the callee upon return. + +The idea is that it saves callers from having to remember this value, +which would often require a callee-saved register. Callees usually +need to keep this value live for most of their body anyway, so it +doesn't add a significant burden on them. + +We currently implement this in codegen, however this is suboptimal +because it means that it would be quite awkward to implement the +optimization for callers. + +A better implementation would be to relax the LLVM IR rules for sret +arguments to allow a function with an sret argument to have a non-void +return type, and to have the front-end to set up the sret argument value +as the return value of the function. The front-end could more easily +emit uses of the returned struct value to be in terms of the function's +lowered return value, and it would free non-C frontends from a +complication only required by a C-based ABI. + +//===---------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 3a4dc23b9a57..9ba1f53f88d9 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -875,6 +875,25 @@ SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), ValToCopy, Flag); Flag = Chain.getValue(1); } + + // The x86-64 ABI for returning structs by value requires that we copy + // the sret argument into %rax for the return. We saved the argument into + // a virtual register in the entry block, so now we copy the value out + // and into %rax. + if (Subtarget->is64Bit() && + DAG.getMachineFunction().getFunction()->hasStructRetAttr()) { + MachineFunction &MF = DAG.getMachineFunction(); + X86MachineFunctionInfo *FuncInfo = MF.getInfo(); + unsigned Reg = FuncInfo->getSRetReturnReg(); + if (!Reg) { + Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64)); + FuncInfo->setSRetReturnReg(Reg); + } + SDOperand Val = DAG.getCopyFromReg(Chain, Reg, getPointerTy()); + + Chain = DAG.getCopyToReg(Chain, X86::RAX, Val, Flag); + Flag = Chain.getValue(1); + } RetOps[0] = Chain; // Update chain. @@ -1225,6 +1244,21 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) { } } + // The x86-64 ABI for returning structs by value requires that we copy + // the sret argument into %rax for the return. Save the argument into + // a virtual register so that we can access it from the return points. + if (Is64Bit && DAG.getMachineFunction().getFunction()->hasStructRetAttr()) { + MachineFunction &MF = DAG.getMachineFunction(); + X86MachineFunctionInfo *FuncInfo = MF.getInfo(); + unsigned Reg = FuncInfo->getSRetReturnReg(); + if (!Reg) { + Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64)); + FuncInfo->setSRetReturnReg(Reg); + } + SDOperand Copy = DAG.getCopyToReg(DAG.getEntryNode(), Reg, ArgValues[0]); + Root = DAG.getNode(ISD::TokenFactor, MVT::Other, Copy, Root); + } + unsigned StackSize = CCInfo.getNextStackOffset(); // align stack specially for tail calls if (CC == CallingConv::Fast) diff --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/llvm/lib/Target/X86/X86MachineFunctionInfo.h index fcdeb0572c35..b5c9cafeca82 100644 --- a/llvm/lib/Target/X86/X86MachineFunctionInfo.h +++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.h @@ -53,20 +53,27 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { /// the returnaddr can be savely move to this area int TailCallReturnAddrDelta; + /// SRetReturnReg - Some subtargets require that sret lowering includes + /// returning the value of the returned struct in a register. This field + /// holds the virtual register into which the sret argument is passed. + unsigned SRetReturnReg; + public: X86MachineFunctionInfo() : ForceFramePointer(false), CalleeSavedFrameSize(0), BytesToPopOnReturn(0), DecorationStyle(None), ReturnAddrIndex(0), - TailCallReturnAddrDelta(0) {} + TailCallReturnAddrDelta(0), + SRetReturnReg(0) {} X86MachineFunctionInfo(MachineFunction &MF) : ForceFramePointer(false), CalleeSavedFrameSize(0), BytesToPopOnReturn(0), DecorationStyle(None), ReturnAddrIndex(0), - TailCallReturnAddrDelta(0) {} + TailCallReturnAddrDelta(0), + SRetReturnReg(0) {} bool getForceFramePointer() const { return ForceFramePointer;} void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; } @@ -85,6 +92,9 @@ public: int getTCReturnAddrDelta() const { return TailCallReturnAddrDelta; } void setTCReturnAddrDelta(int delta) {TailCallReturnAddrDelta = delta;} + + unsigned getSRetReturnReg() const { return SRetReturnReg; } + void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; } }; } // End llvm namespace diff --git a/llvm/test/CodeGen/X86/x86-64-sret-return.ll b/llvm/test/CodeGen/X86/x86-64-sret-return.ll new file mode 100644 index 000000000000..9298661998b0 --- /dev/null +++ b/llvm/test/CodeGen/X86/x86-64-sret-return.ll @@ -0,0 +1,54 @@ +; RUN: llvm-as < %s | llc | grep {movq %rdi, %rax} + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-apple-darwin8" + %struct.foo = type { [4 x i64] } + +define void @bar(%struct.foo* noalias sret %agg.result, %struct.foo* %d) nounwind { +entry: + %d_addr = alloca %struct.foo* ; <%struct.foo**> [#uses=2] + %memtmp = alloca %struct.foo, align 8 ; <%struct.foo*> [#uses=1] + %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] + store %struct.foo* %d, %struct.foo** %d_addr + %tmp = load %struct.foo** %d_addr, align 8 ; <%struct.foo*> [#uses=1] + %tmp1 = getelementptr %struct.foo* %agg.result, i32 0, i32 0 ; <[4 x i64]*> [#uses=4] + %tmp2 = getelementptr %struct.foo* %tmp, i32 0, i32 0 ; <[4 x i64]*> [#uses=4] + %tmp3 = getelementptr [4 x i64]* %tmp1, i32 0, i32 0 ; [#uses=1] + %tmp4 = getelementptr [4 x i64]* %tmp2, i32 0, i32 0 ; [#uses=1] + %tmp5 = load i64* %tmp4, align 8 ; [#uses=1] + store i64 %tmp5, i64* %tmp3, align 8 + %tmp6 = getelementptr [4 x i64]* %tmp1, i32 0, i32 1 ; [#uses=1] + %tmp7 = getelementptr [4 x i64]* %tmp2, i32 0, i32 1 ; [#uses=1] + %tmp8 = load i64* %tmp7, align 8 ; [#uses=1] + store i64 %tmp8, i64* %tmp6, align 8 + %tmp9 = getelementptr [4 x i64]* %tmp1, i32 0, i32 2 ; [#uses=1] + %tmp10 = getelementptr [4 x i64]* %tmp2, i32 0, i32 2 ; [#uses=1] + %tmp11 = load i64* %tmp10, align 8 ; [#uses=1] + store i64 %tmp11, i64* %tmp9, align 8 + %tmp12 = getelementptr [4 x i64]* %tmp1, i32 0, i32 3 ; [#uses=1] + %tmp13 = getelementptr [4 x i64]* %tmp2, i32 0, i32 3 ; [#uses=1] + %tmp14 = load i64* %tmp13, align 8 ; [#uses=1] + store i64 %tmp14, i64* %tmp12, align 8 + %tmp15 = getelementptr %struct.foo* %memtmp, i32 0, i32 0 ; <[4 x i64]*> [#uses=4] + %tmp16 = getelementptr %struct.foo* %agg.result, i32 0, i32 0 ; <[4 x i64]*> [#uses=4] + %tmp17 = getelementptr [4 x i64]* %tmp15, i32 0, i32 0 ; [#uses=1] + %tmp18 = getelementptr [4 x i64]* %tmp16, i32 0, i32 0 ; [#uses=1] + %tmp19 = load i64* %tmp18, align 8 ; [#uses=1] + store i64 %tmp19, i64* %tmp17, align 8 + %tmp20 = getelementptr [4 x i64]* %tmp15, i32 0, i32 1 ; [#uses=1] + %tmp21 = getelementptr [4 x i64]* %tmp16, i32 0, i32 1 ; [#uses=1] + %tmp22 = load i64* %tmp21, align 8 ; [#uses=1] + store i64 %tmp22, i64* %tmp20, align 8 + %tmp23 = getelementptr [4 x i64]* %tmp15, i32 0, i32 2 ; [#uses=1] + %tmp24 = getelementptr [4 x i64]* %tmp16, i32 0, i32 2 ; [#uses=1] + %tmp25 = load i64* %tmp24, align 8 ; [#uses=1] + store i64 %tmp25, i64* %tmp23, align 8 + %tmp26 = getelementptr [4 x i64]* %tmp15, i32 0, i32 3 ; [#uses=1] + %tmp27 = getelementptr [4 x i64]* %tmp16, i32 0, i32 3 ; [#uses=1] + %tmp28 = load i64* %tmp27, align 8 ; [#uses=1] + store i64 %tmp28, i64* %tmp26, align 8 + br label %return + +return: ; preds = %entry + ret void +}