forked from OSchip/llvm-project
Implement an x86-64 ABI detail of passing structs by hidden first
argument. The x86-64 ABI requires the incoming value of %rdi to be copied to %rax on exit from a function that is returning a large C struct. Also, add a README-X86-64 entry detailing the missed optimization opportunity and proposing an alternative approach. llvm-svn: 50075
This commit is contained in:
parent
a591a12ea5
commit
f166d2d0d6
|
@ -236,3 +236,24 @@ on the result of the movb).
|
||||||
|
|
||||||
//===---------------------------------------------------------------------===//
|
//===---------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
The x86-64 ABI for hidden-argument struct returns requires that the
|
||||||
|
incoming value of %rdi be copied into %rax by the callee upon return.
|
||||||
|
|
||||||
|
The idea is that it saves callers from having to remember this value,
|
||||||
|
which would often require a callee-saved register. Callees usually
|
||||||
|
need to keep this value live for most of their body anyway, so it
|
||||||
|
doesn't add a significant burden on them.
|
||||||
|
|
||||||
|
We currently implement this in codegen, however this is suboptimal
|
||||||
|
because it means that it would be quite awkward to implement the
|
||||||
|
optimization for callers.
|
||||||
|
|
||||||
|
A better implementation would be to relax the LLVM IR rules for sret
|
||||||
|
arguments to allow a function with an sret argument to have a non-void
|
||||||
|
return type, and to have the front-end to set up the sret argument value
|
||||||
|
as the return value of the function. The front-end could more easily
|
||||||
|
emit uses of the returned struct value to be in terms of the function's
|
||||||
|
lowered return value, and it would free non-C frontends from a
|
||||||
|
complication only required by a C-based ABI.
|
||||||
|
|
||||||
|
//===---------------------------------------------------------------------===//
|
||||||
|
|
|
@ -876,6 +876,25 @@ SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) {
|
||||||
Flag = Chain.getValue(1);
|
Flag = Chain.getValue(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The x86-64 ABI for returning structs by value requires that we copy
|
||||||
|
// the sret argument into %rax for the return. We saved the argument into
|
||||||
|
// a virtual register in the entry block, so now we copy the value out
|
||||||
|
// and into %rax.
|
||||||
|
if (Subtarget->is64Bit() &&
|
||||||
|
DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
|
||||||
|
MachineFunction &MF = DAG.getMachineFunction();
|
||||||
|
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
|
||||||
|
unsigned Reg = FuncInfo->getSRetReturnReg();
|
||||||
|
if (!Reg) {
|
||||||
|
Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
|
||||||
|
FuncInfo->setSRetReturnReg(Reg);
|
||||||
|
}
|
||||||
|
SDOperand Val = DAG.getCopyFromReg(Chain, Reg, getPointerTy());
|
||||||
|
|
||||||
|
Chain = DAG.getCopyToReg(Chain, X86::RAX, Val, Flag);
|
||||||
|
Flag = Chain.getValue(1);
|
||||||
|
}
|
||||||
|
|
||||||
RetOps[0] = Chain; // Update chain.
|
RetOps[0] = Chain; // Update chain.
|
||||||
|
|
||||||
// Add the flag if we have it.
|
// Add the flag if we have it.
|
||||||
|
@ -1225,6 +1244,21 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The x86-64 ABI for returning structs by value requires that we copy
|
||||||
|
// the sret argument into %rax for the return. Save the argument into
|
||||||
|
// a virtual register so that we can access it from the return points.
|
||||||
|
if (Is64Bit && DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
|
||||||
|
MachineFunction &MF = DAG.getMachineFunction();
|
||||||
|
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
|
||||||
|
unsigned Reg = FuncInfo->getSRetReturnReg();
|
||||||
|
if (!Reg) {
|
||||||
|
Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
|
||||||
|
FuncInfo->setSRetReturnReg(Reg);
|
||||||
|
}
|
||||||
|
SDOperand Copy = DAG.getCopyToReg(DAG.getEntryNode(), Reg, ArgValues[0]);
|
||||||
|
Root = DAG.getNode(ISD::TokenFactor, MVT::Other, Copy, Root);
|
||||||
|
}
|
||||||
|
|
||||||
unsigned StackSize = CCInfo.getNextStackOffset();
|
unsigned StackSize = CCInfo.getNextStackOffset();
|
||||||
// align stack specially for tail calls
|
// align stack specially for tail calls
|
||||||
if (CC == CallingConv::Fast)
|
if (CC == CallingConv::Fast)
|
||||||
|
|
|
@ -53,20 +53,27 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
|
||||||
/// the returnaddr can be savely move to this area
|
/// the returnaddr can be savely move to this area
|
||||||
int TailCallReturnAddrDelta;
|
int TailCallReturnAddrDelta;
|
||||||
|
|
||||||
|
/// SRetReturnReg - Some subtargets require that sret lowering includes
|
||||||
|
/// returning the value of the returned struct in a register. This field
|
||||||
|
/// holds the virtual register into which the sret argument is passed.
|
||||||
|
unsigned SRetReturnReg;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
X86MachineFunctionInfo() : ForceFramePointer(false),
|
X86MachineFunctionInfo() : ForceFramePointer(false),
|
||||||
CalleeSavedFrameSize(0),
|
CalleeSavedFrameSize(0),
|
||||||
BytesToPopOnReturn(0),
|
BytesToPopOnReturn(0),
|
||||||
DecorationStyle(None),
|
DecorationStyle(None),
|
||||||
ReturnAddrIndex(0),
|
ReturnAddrIndex(0),
|
||||||
TailCallReturnAddrDelta(0) {}
|
TailCallReturnAddrDelta(0),
|
||||||
|
SRetReturnReg(0) {}
|
||||||
|
|
||||||
X86MachineFunctionInfo(MachineFunction &MF) : ForceFramePointer(false),
|
X86MachineFunctionInfo(MachineFunction &MF) : ForceFramePointer(false),
|
||||||
CalleeSavedFrameSize(0),
|
CalleeSavedFrameSize(0),
|
||||||
BytesToPopOnReturn(0),
|
BytesToPopOnReturn(0),
|
||||||
DecorationStyle(None),
|
DecorationStyle(None),
|
||||||
ReturnAddrIndex(0),
|
ReturnAddrIndex(0),
|
||||||
TailCallReturnAddrDelta(0) {}
|
TailCallReturnAddrDelta(0),
|
||||||
|
SRetReturnReg(0) {}
|
||||||
|
|
||||||
bool getForceFramePointer() const { return ForceFramePointer;}
|
bool getForceFramePointer() const { return ForceFramePointer;}
|
||||||
void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
|
void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
|
||||||
|
@ -85,6 +92,9 @@ public:
|
||||||
|
|
||||||
int getTCReturnAddrDelta() const { return TailCallReturnAddrDelta; }
|
int getTCReturnAddrDelta() const { return TailCallReturnAddrDelta; }
|
||||||
void setTCReturnAddrDelta(int delta) {TailCallReturnAddrDelta = delta;}
|
void setTCReturnAddrDelta(int delta) {TailCallReturnAddrDelta = delta;}
|
||||||
|
|
||||||
|
unsigned getSRetReturnReg() const { return SRetReturnReg; }
|
||||||
|
void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
|
||||||
};
|
};
|
||||||
} // End llvm namespace
|
} // End llvm namespace
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,54 @@
|
||||||
|
; RUN: llvm-as < %s | llc | grep {movq %rdi, %rax}
|
||||||
|
|
||||||
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
||||||
|
target triple = "x86_64-apple-darwin8"
|
||||||
|
%struct.foo = type { [4 x i64] }
|
||||||
|
|
||||||
|
define void @bar(%struct.foo* noalias sret %agg.result, %struct.foo* %d) nounwind {
|
||||||
|
entry:
|
||||||
|
%d_addr = alloca %struct.foo* ; <%struct.foo**> [#uses=2]
|
||||||
|
%memtmp = alloca %struct.foo, align 8 ; <%struct.foo*> [#uses=1]
|
||||||
|
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
|
||||||
|
store %struct.foo* %d, %struct.foo** %d_addr
|
||||||
|
%tmp = load %struct.foo** %d_addr, align 8 ; <%struct.foo*> [#uses=1]
|
||||||
|
%tmp1 = getelementptr %struct.foo* %agg.result, i32 0, i32 0 ; <[4 x i64]*> [#uses=4]
|
||||||
|
%tmp2 = getelementptr %struct.foo* %tmp, i32 0, i32 0 ; <[4 x i64]*> [#uses=4]
|
||||||
|
%tmp3 = getelementptr [4 x i64]* %tmp1, i32 0, i32 0 ; <i64*> [#uses=1]
|
||||||
|
%tmp4 = getelementptr [4 x i64]* %tmp2, i32 0, i32 0 ; <i64*> [#uses=1]
|
||||||
|
%tmp5 = load i64* %tmp4, align 8 ; <i64> [#uses=1]
|
||||||
|
store i64 %tmp5, i64* %tmp3, align 8
|
||||||
|
%tmp6 = getelementptr [4 x i64]* %tmp1, i32 0, i32 1 ; <i64*> [#uses=1]
|
||||||
|
%tmp7 = getelementptr [4 x i64]* %tmp2, i32 0, i32 1 ; <i64*> [#uses=1]
|
||||||
|
%tmp8 = load i64* %tmp7, align 8 ; <i64> [#uses=1]
|
||||||
|
store i64 %tmp8, i64* %tmp6, align 8
|
||||||
|
%tmp9 = getelementptr [4 x i64]* %tmp1, i32 0, i32 2 ; <i64*> [#uses=1]
|
||||||
|
%tmp10 = getelementptr [4 x i64]* %tmp2, i32 0, i32 2 ; <i64*> [#uses=1]
|
||||||
|
%tmp11 = load i64* %tmp10, align 8 ; <i64> [#uses=1]
|
||||||
|
store i64 %tmp11, i64* %tmp9, align 8
|
||||||
|
%tmp12 = getelementptr [4 x i64]* %tmp1, i32 0, i32 3 ; <i64*> [#uses=1]
|
||||||
|
%tmp13 = getelementptr [4 x i64]* %tmp2, i32 0, i32 3 ; <i64*> [#uses=1]
|
||||||
|
%tmp14 = load i64* %tmp13, align 8 ; <i64> [#uses=1]
|
||||||
|
store i64 %tmp14, i64* %tmp12, align 8
|
||||||
|
%tmp15 = getelementptr %struct.foo* %memtmp, i32 0, i32 0 ; <[4 x i64]*> [#uses=4]
|
||||||
|
%tmp16 = getelementptr %struct.foo* %agg.result, i32 0, i32 0 ; <[4 x i64]*> [#uses=4]
|
||||||
|
%tmp17 = getelementptr [4 x i64]* %tmp15, i32 0, i32 0 ; <i64*> [#uses=1]
|
||||||
|
%tmp18 = getelementptr [4 x i64]* %tmp16, i32 0, i32 0 ; <i64*> [#uses=1]
|
||||||
|
%tmp19 = load i64* %tmp18, align 8 ; <i64> [#uses=1]
|
||||||
|
store i64 %tmp19, i64* %tmp17, align 8
|
||||||
|
%tmp20 = getelementptr [4 x i64]* %tmp15, i32 0, i32 1 ; <i64*> [#uses=1]
|
||||||
|
%tmp21 = getelementptr [4 x i64]* %tmp16, i32 0, i32 1 ; <i64*> [#uses=1]
|
||||||
|
%tmp22 = load i64* %tmp21, align 8 ; <i64> [#uses=1]
|
||||||
|
store i64 %tmp22, i64* %tmp20, align 8
|
||||||
|
%tmp23 = getelementptr [4 x i64]* %tmp15, i32 0, i32 2 ; <i64*> [#uses=1]
|
||||||
|
%tmp24 = getelementptr [4 x i64]* %tmp16, i32 0, i32 2 ; <i64*> [#uses=1]
|
||||||
|
%tmp25 = load i64* %tmp24, align 8 ; <i64> [#uses=1]
|
||||||
|
store i64 %tmp25, i64* %tmp23, align 8
|
||||||
|
%tmp26 = getelementptr [4 x i64]* %tmp15, i32 0, i32 3 ; <i64*> [#uses=1]
|
||||||
|
%tmp27 = getelementptr [4 x i64]* %tmp16, i32 0, i32 3 ; <i64*> [#uses=1]
|
||||||
|
%tmp28 = load i64* %tmp27, align 8 ; <i64> [#uses=1]
|
||||||
|
store i64 %tmp28, i64* %tmp26, align 8
|
||||||
|
br label %return
|
||||||
|
|
||||||
|
return: ; preds = %entry
|
||||||
|
ret void
|
||||||
|
}
|
Loading…
Reference in New Issue