forked from OSchip/llvm-project
Fix a fastcc + sret bug. If fastcc and sret, callee doesn't need to pop the hidden struct ptr; Re-enable fastcc.
llvm-svn: 56061
This commit is contained in:
parent
eece3fe4fd
commit
710c3cf36a
|
@ -1103,6 +1103,8 @@ CCAssignFn *X86TargetLowering::CCAssignFnForNode(SDValue Op) const {
|
|||
return CC_X86_32_FastCall;
|
||||
else if (CC == CallingConv::Fast && PerformTailCallOpt)
|
||||
return CC_X86_32_TailCall;
|
||||
else if (CC == CallingConv::Fast)
|
||||
return CC_X86_32_FastCC;
|
||||
else
|
||||
return CC_X86_32_C;
|
||||
}
|
||||
|
@ -1391,7 +1393,7 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
|
|||
} else {
|
||||
BytesToPopOnReturn = 0; // Callee pops nothing.
|
||||
// If this is an sret function, the return should pop the hidden pointer.
|
||||
if (!Is64Bit && ArgsAreStructReturn(Op))
|
||||
if (!Is64Bit && CC != CallingConv::Fast && ArgsAreStructReturn(Op))
|
||||
BytesToPopOnReturn = 4;
|
||||
BytesCallerReserves = StackSize;
|
||||
}
|
||||
|
@ -1773,7 +1775,7 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
|
|||
unsigned NumBytesForCalleeToPush;
|
||||
if (IsCalleePop(Op))
|
||||
NumBytesForCalleeToPush = NumBytes; // Callee pops everything
|
||||
else if (!Is64Bit && IsStructRet)
|
||||
else if (!Is64Bit && CC != CallingConv::Fast && IsStructRet)
|
||||
// If this is is a call to a struct-return function, the callee
|
||||
// pops the hidden struct pointer, so we have to push it back.
|
||||
// This is common for Darwin/X86, Linux & Mingw32 targets.
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
; RUN: llvm-as < %s | llc -march=x86 | grep ret | not grep 4
|
||||
|
||||
%struct.foo = type { [4 x i32] }
|
||||
|
||||
define fastcc void @bar(%struct.foo* noalias sret %agg.result) nounwind {
|
||||
entry:
|
||||
%tmp1 = getelementptr %struct.foo* %agg.result, i32 0, i32 0
|
||||
%tmp3 = getelementptr [4 x i32]* %tmp1, i32 0, i32 0
|
||||
store i32 1, i32* %tmp3, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
@dst = external global i32
|
||||
|
||||
define void @foo() nounwind {
|
||||
%memtmp = alloca %struct.foo, align 4
|
||||
call fastcc void @bar( %struct.foo* sret %memtmp ) nounwind
|
||||
%tmp4 = getelementptr %struct.foo* %memtmp, i32 0, i32 0
|
||||
%tmp5 = getelementptr [4 x i32]* %tmp4, i32 0, i32 0
|
||||
%tmp6 = load i32* %tmp5
|
||||
store i32 %tmp6, i32* @dst
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
; RUN: llvm-as < %s | llc -march=x86 | grep ret | grep 4
|
||||
|
||||
%struct.foo = type { [4 x i32] }
|
||||
|
||||
define void @bar(%struct.foo* noalias sret %agg.result) nounwind {
|
||||
entry:
|
||||
%tmp1 = getelementptr %struct.foo* %agg.result, i32 0, i32 0
|
||||
%tmp3 = getelementptr [4 x i32]* %tmp1, i32 0, i32 0
|
||||
store i32 1, i32* %tmp3, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
@dst = external global i32
|
||||
|
||||
define void @foo() nounwind {
|
||||
%memtmp = alloca %struct.foo, align 4
|
||||
call void @bar( %struct.foo* sret %memtmp ) nounwind
|
||||
%tmp4 = getelementptr %struct.foo* %memtmp, i32 0, i32 0
|
||||
%tmp5 = getelementptr [4 x i32]* %tmp4, i32 0, i32 0
|
||||
%tmp6 = load i32* %tmp5
|
||||
store i32 %tmp6, i32* @dst
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue