MS ABI: Use musttail for vtable thunks that pass arguments by value

This moves some memptr specific code into the generic thunk emission
codepath.

Fixes PR20053.

Reviewers: majnemer

Differential Revision: http://reviews.llvm.org/D4613

llvm-svn: 214004
This commit is contained in:
Reid Kleckner 2014-07-26 01:34:32 +00:00
parent 3f76ac7daa
commit ab2090d107
7 changed files with 205 additions and 58 deletions

View File

@ -2045,19 +2045,8 @@ void CodeGenFunction::EmitDelegateCallArg(CallArgList &args,
return args.add(RValue::get(Builder.CreateLoad(local)), type);
}
if (isInAllocaArgument(CGM.getCXXABI(), type)) {
AggValueSlot Slot = createPlaceholderSlot(*this, type);
Slot.setExternallyDestructed();
// FIXME: Either emit a copy constructor call, or figure out how to do
// guaranteed tail calls with perfect forwarding in LLVM.
CGM.ErrorUnsupported(param, "non-trivial argument copy for thunk");
EmitNullInitialization(Slot.getAddr(), type);
RValue RV = Slot.asRValue();
args.add(RV, type);
return;
}
assert(!isInAllocaArgument(CGM.getCXXABI(), type) &&
"cannot emit delegate call arguments for inalloca arguments!");
args.add(convertTempToRValue(local, type, loc), type);
}

View File

@ -236,6 +236,18 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Value *Callee,
*this, LoadCXXThis(), Thunk->This)
: LoadCXXThis();
if (CurFnInfo->usesInAlloca()) {
// We don't handle return adjusting thunks, because they require us to call
// the copy constructor. For now, fall through and pretend the return
// adjustment was empty so we don't crash.
if (Thunk && !Thunk->Return.isEmpty()) {
CGM.ErrorUnsupported(
MD, "non-trivial argument copy for return-adjusting thunk");
}
EmitMustTailThunk(MD, AdjustedThisPtr, Callee);
return;
}
// Start building CallArgs.
CallArgList CallArgs;
QualType ThisType = MD->getThisType(getContext());
@ -278,8 +290,9 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Value *Callee,
Slot = ReturnValueSlot(ReturnValue, ResultType.isVolatileQualified());
// Now emit our call.
RValue RV = EmitCall(*CurFnInfo, Callee, Slot, CallArgs, MD);
llvm::Instruction *CallOrInvoke;
RValue RV = EmitCall(*CurFnInfo, Callee, Slot, CallArgs, MD, &CallOrInvoke);
// Consider return adjustment if we have ThunkInfo.
if (Thunk && !Thunk->Return.isEmpty())
RV = PerformReturnAdjustment(*this, ResultType, RV, *Thunk);
@ -294,6 +307,62 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Value *Callee,
FinishFunction();
}
void CodeGenFunction::EmitMustTailThunk(const CXXMethodDecl *MD,
llvm::Value *AdjustedThisPtr,
llvm::Value *Callee) {
// Emitting a musttail call thunk doesn't use any of the CGCall.cpp machinery
// to translate AST arguments into LLVM IR arguments. For thunks, we know
// that the caller prototype more or less matches the callee prototype with
// the exception of 'this'.
SmallVector<llvm::Value *, 8> Args;
for (llvm::Argument &A : CurFn->args())
Args.push_back(&A);
// Set the adjusted 'this' pointer.
const ABIArgInfo &ThisAI = CurFnInfo->arg_begin()->info;
if (ThisAI.isDirect()) {
const ABIArgInfo &RetAI = CurFnInfo->getReturnInfo();
int ThisArgNo = RetAI.isIndirect() && !RetAI.isSRetAfterThis() ? 1 : 0;
llvm::Type *ThisType = Args[ThisArgNo]->getType();
if (ThisType != AdjustedThisPtr->getType())
AdjustedThisPtr = Builder.CreateBitCast(AdjustedThisPtr, ThisType);
Args[ThisArgNo] = AdjustedThisPtr;
} else {
assert(ThisAI.isInAlloca() && "this is passed directly or inalloca");
llvm::Value *ThisAddr = GetAddrOfLocalVar(CXXABIThisDecl);
llvm::Type *ThisType =
cast<llvm::PointerType>(ThisAddr->getType())->getElementType();
if (ThisType != AdjustedThisPtr->getType())
AdjustedThisPtr = Builder.CreateBitCast(AdjustedThisPtr, ThisType);
Builder.CreateStore(AdjustedThisPtr, ThisAddr);
}
// Emit the musttail call manually. Even if the prologue pushed cleanups, we
// don't actually want to run them.
llvm::CallInst *Call = Builder.CreateCall(Callee, Args);
Call->setTailCallKind(llvm::CallInst::TCK_MustTail);
// Apply the standard set of call attributes.
unsigned CallingConv;
CodeGen::AttributeListType AttributeList;
CGM.ConstructAttributeList(*CurFnInfo, MD, AttributeList, CallingConv,
/*AttrOnCallSite=*/true);
llvm::AttributeSet Attrs =
llvm::AttributeSet::get(getLLVMContext(), AttributeList);
Call->setAttributes(Attrs);
Call->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv));
if (Call->getType()->isVoidTy())
Builder.CreateRetVoid();
else
Builder.CreateRet(Call);
// Finish the function to maintain CodeGenFunction invariants.
// FIXME: Don't emit unreachable code.
EmitBlock(createBasicBlock());
FinishFunction();
}
void CodeGenFunction::GenerateThunk(llvm::Function *Fn,
const CGFunctionInfo &FnInfo,
GlobalDecl GD, const ThunkInfo &Thunk) {

View File

@ -1210,6 +1210,10 @@ public:
void EmitCallAndReturnForThunk(llvm::Value *Callee, const ThunkInfo *Thunk);
/// Emit a musttail call for a thunk with a potentially adjusted this pointer.
void EmitMustTailThunk(const CXXMethodDecl *MD, llvm::Value *AdjustedThisPtr,
llvm::Value *Callee);
/// GenerateThunk - Generate a thunk for the given method.
void GenerateThunk(llvm::Function *Fn, const CGFunctionInfo &FnInfo,
GlobalDecl GD, const ThunkInfo &Thunk);

View File

@ -1467,31 +1467,7 @@ llvm::Function *MicrosoftCXXABI::EmitVirtualMemPtrThunk(
CGF.Builder.CreateConstInBoundsGEP1_64(VTable, ML.Index, "vfn");
llvm::Value *Callee = CGF.Builder.CreateLoad(VFuncPtr);
unsigned CallingConv;
CodeGen::AttributeListType AttributeList;
CGM.ConstructAttributeList(FnInfo, MD, AttributeList, CallingConv, true);
llvm::AttributeSet Attrs =
llvm::AttributeSet::get(CGF.getLLVMContext(), AttributeList);
// Do a musttail call with perfect argument forwarding. Any inalloca argument
// will be forwarded in place without any copy.
SmallVector<llvm::Value *, 8> Args;
for (llvm::Argument &A : ThunkFn->args())
Args.push_back(&A);
llvm::CallInst *Call = CGF.Builder.CreateCall(Callee, Args);
Call->setTailCallKind(llvm::CallInst::TCK_MustTail);
Call->setAttributes(Attrs);
Call->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv));
if (Call->getType()->isVoidTy())
CGF.Builder.CreateRetVoid();
else
CGF.Builder.CreateRet(Call);
// Finish the function to maintain CodeGenFunction invariants.
// FIXME: Don't emit unreachable code.
CGF.EmitBlock(CGF.createBasicBlock());
CGF.FinishFunction();
CGF.EmitCallAndReturnForThunk(Callee, 0);
return ThunkFn;
}

View File

@ -1,4 +1,4 @@
// RUN: not %clang_cc1 %s -fno-rtti -triple=i686-pc-win32 -emit-llvm -o /dev/null 2>&1 | FileCheck --check-prefix=CHECK32 %s
// RUN: %clang_cc1 %s -fno-rtti -triple=i686-pc-win32 -emit-llvm -o - | FileCheck --check-prefix=CHECK32 %s
// RUN: %clang_cc1 %s -fno-rtti -triple=x86_64-pc-win32 -emit-llvm -o - | FileCheck --check-prefix=CHECK64 %s
namespace byval_thunk {
@ -11,15 +11,103 @@ struct Agg {
struct A { virtual void foo(Agg x); };
struct B { virtual void foo(Agg x); };
struct C : A, B { virtual void foo(Agg x); };
C c;
struct C : A, B { C(); virtual void foo(Agg x); };
C::C() {} // force emission
// CHECK32: cannot compile this non-trivial argument copy for thunk yet
// CHECK32-LABEL: define linkonce_odr x86_thiscallcc void @"\01?foo@C@byval_thunk@@W3AEXUAgg@2@@Z"
// CHECK32: (%"struct.byval_thunk::C"* %this, <{ %"struct.byval_thunk::Agg" }>* inalloca)
// CHECK32: %2 = getelementptr i8* %{{.*}}, i32 -4
// CHECK32: musttail call x86_thiscallcc void @"\01?foo@C@byval_thunk@@UAEXUAgg@2@@Z"
// CHECK32: (%"struct.byval_thunk::C"* %{{.*}}, <{ %"struct.byval_thunk::Agg" }>* inalloca %0)
// CHECK32-NEXT: ret void
// CHECK64-LABEL: define linkonce_odr void @"\01?foo@C@byval_thunk@@W7EAAXUAgg@2@@Z"
// CHECK64: (%"struct.byval_thunk::C"* %this, %"struct.byval_thunk::Agg"* %x)
// CHECK64: getelementptr i8* %{{.*}}, i32 -8
// CHECK64: call void @"\01?foo@C@byval_thunk@@UEAAXUAgg@2@@Z"(%"struct.byval_thunk::C"* %{{.*}}, %"struct.byval_thunk::Agg"* %x)
// CHECK64: call void @"\01?foo@C@byval_thunk@@UEAAXUAgg@2@@Z"
// CHECK64: (%"struct.byval_thunk::C"* %{{.*}}, %"struct.byval_thunk::Agg"* %x)
// CHECK64-NOT: call
// CHECK64: ret void
}
namespace stdcall_thunk {
struct Agg {
Agg();
Agg(const Agg &);
~Agg();
int x;
};
struct A { virtual void __stdcall foo(Agg x); };
struct B { virtual void __stdcall foo(Agg x); };
struct C : A, B { C(); virtual void __stdcall foo(Agg x); };
C::C() {} // force emission
// CHECK32-LABEL: define linkonce_odr x86_stdcallcc void @"\01?foo@C@stdcall_thunk@@W3AGXUAgg@2@@Z"
// CHECK32: (<{ %"struct.stdcall_thunk::C"*, %"struct.stdcall_thunk::Agg" }>* inalloca)
// CHECK32: %[[this_slot:[^ ]*]] = getelementptr inbounds <{ %"struct.stdcall_thunk::C"*, %"struct.stdcall_thunk::Agg" }>* %0, i32 0, i32 0
// CHECK32: load %"struct.stdcall_thunk::C"** %[[this_slot]]
// CHECK32: getelementptr i8* %{{.*}}, i32 -4
// CHECK32: store %"struct.stdcall_thunk::C"* %{{.*}}, %"struct.stdcall_thunk::C"** %[[this_slot]]
// CHECK32: musttail call x86_stdcallcc void @"\01?foo@C@stdcall_thunk@@UAGXUAgg@2@@Z"
// CHECK32: (<{ %"struct.stdcall_thunk::C"*, %"struct.stdcall_thunk::Agg" }>* inalloca %0)
// CHECK32-NEXT: ret void
// CHECK64-LABEL: define linkonce_odr void @"\01?foo@C@stdcall_thunk@@W7EAAXUAgg@2@@Z"
// CHECK64: (%"struct.stdcall_thunk::C"* %this, %"struct.stdcall_thunk::Agg"* %x)
// CHECK64: getelementptr i8* %{{.*}}, i32 -8
// CHECK64: call void @"\01?foo@C@stdcall_thunk@@UEAAXUAgg@2@@Z"
// CHECK64: (%"struct.stdcall_thunk::C"* %{{.*}}, %"struct.stdcall_thunk::Agg"* %x)
// CHECK64-NOT: call
// CHECK64: ret void
}
namespace sret_thunk {
struct Agg {
Agg();
Agg(const Agg &);
~Agg();
int x;
};
struct A { virtual Agg __cdecl foo(Agg x); };
struct B { virtual Agg __cdecl foo(Agg x); };
struct C : A, B { C(); virtual Agg __cdecl foo(Agg x); };
C::C() {} // force emission
// CHECK32-LABEL: define linkonce_odr %"struct.sret_thunk::Agg"* @"\01?foo@C@sret_thunk@@W3AA?AUAgg@2@U32@@Z"
// CHECK32: (<{ %"struct.sret_thunk::C"*, %"struct.sret_thunk::Agg"*, %"struct.sret_thunk::Agg" }>* inalloca)
// CHECK32: %[[this_slot:[^ ]*]] = getelementptr inbounds <{ %"struct.sret_thunk::C"*, %"struct.sret_thunk::Agg"*, %"struct.sret_thunk::Agg" }>* %0, i32 0, i32 0
// CHECK32: load %"struct.sret_thunk::C"** %[[this_slot]]
// CHECK32: getelementptr i8* %{{.*}}, i32 -4
// CHECK32: store %"struct.sret_thunk::C"* %{{.*}}, %"struct.sret_thunk::C"** %[[this_slot]]
// CHECK32: %[[rv:[^ ]*]] = musttail call %"struct.sret_thunk::Agg"* @"\01?foo@C@sret_thunk@@UAA?AUAgg@2@U32@@Z"
// CHECK32: (<{ %"struct.sret_thunk::C"*, %"struct.sret_thunk::Agg"*, %"struct.sret_thunk::Agg" }>* inalloca %0)
// CHECK32-NEXT: ret %"struct.sret_thunk::Agg"* %[[rv]]
// CHECK64-LABEL: define linkonce_odr void @"\01?foo@C@sret_thunk@@W7EAA?AUAgg@2@U32@@Z"
// CHECK64: (%"struct.sret_thunk::C"* %this, %"struct.sret_thunk::Agg"* noalias sret %agg.result, %"struct.sret_thunk::Agg"* %x)
// CHECK64: getelementptr i8* %{{.*}}, i32 -8
// CHECK64: call void @"\01?foo@C@sret_thunk@@UEAA?AUAgg@2@U32@@Z"
// CHECK64: (%"struct.sret_thunk::C"* %{{.*}}, %"struct.sret_thunk::Agg"* sret %agg.result, %"struct.sret_thunk::Agg"* %x)
// CHECK64-NOT: call
// CHECK64: ret void
}
#if 0
// FIXME: When we extend LLVM IR to allow forwarding of varargs through musttail
// calls, use this test.
namespace variadic_thunk {
struct Agg {
Agg();
Agg(const Agg &);
~Agg();
int x;
};
struct A { virtual void foo(Agg x, ...); };
struct B { virtual void foo(Agg x, ...); };
struct C : A, B { C(); virtual void foo(Agg x, ...); };
C::C() {} // force emission
}
#endif

View File

@ -18,7 +18,7 @@ struct B {
struct C : A, B {
C();
int c;
virtual C *clone(A); // expected-error {{cannot compile this non-trivial argument copy for thunk yet}}
virtual C *clone(A); // expected-error {{cannot compile this non-trivial argument copy for return-adjusting thunk yet}}
};
B::B() {} // force emission
C::C() {} // force emission

View File

@ -18,6 +18,7 @@ struct C {
virtual int bar(int, double);
virtual S baz(int);
virtual S qux(U);
virtual S __fastcall zed(U);
};
namespace {
@ -43,6 +44,9 @@ void f() {
S (C::*ptr5)(U);
ptr5 = &C::qux;
S (__fastcall C::*ptr6)(U);
ptr6 = &C::zed;
// CHECK32-LABEL: define void @"\01?f@@YAXXZ"()
// CHECK32: store i8* bitcast (void (%struct.C*)* @"\01??_9C@@$BA@AE" to i8*), i8** %ptr
@ -64,14 +68,14 @@ void f() {
// CHECK32-LABEL: define linkonce_odr x86_thiscallcc void @"\01??_9C@@$BA@AE"(%struct.C* %this) unnamed_addr
// CHECK32: [[VPTR:%.*]] = getelementptr inbounds void (%struct.C*)** %{{.*}}, i64 0
// CHECK32: [[CALLEE:%.*]] = load void (%struct.C*)** [[VPTR]]
// CHECK32: musttail call x86_thiscallcc void [[CALLEE]](%struct.C* %{{.*}})
// CHECK32: call x86_thiscallcc void [[CALLEE]](%struct.C* %{{.*}})
// CHECK32: ret void
// CHECK32: }
//
// CHECK64-LABEL: define linkonce_odr void @"\01??_9C@@$BA@AA"(%struct.C* %this) unnamed_addr
// CHECK64: [[VPTR:%.*]] = getelementptr inbounds void (%struct.C*)** %{{.*}}, i64 0
// CHECK64: [[CALLEE:%.*]] = load void (%struct.C*)** [[VPTR]]
// CHECK64: musttail call void [[CALLEE]](%struct.C* %{{.*}})
// CHECK64: call void [[CALLEE]](%struct.C* %{{.*}})
// CHECK64: ret void
// CHECK64: }
@ -79,14 +83,14 @@ void f() {
// CHECK32-LABEL: define linkonce_odr x86_thiscallcc i32 @"\01??_9C@@$B3AE"(%struct.C* %this, i32, double) unnamed_addr
// CHECK32: [[VPTR:%.*]] = getelementptr inbounds i32 (%struct.C*, i32, double)** %{{.*}}, i64 1
// CHECK32: [[CALLEE:%.*]] = load i32 (%struct.C*, i32, double)** [[VPTR]]
// CHECK32: [[CALL:%.*]] = musttail call x86_thiscallcc i32 [[CALLEE]](%struct.C* %{{.*}}, i32 %{{.*}}, double %{{.*}})
// CHECK32: [[CALL:%.*]] = call x86_thiscallcc i32 [[CALLEE]](%struct.C* %{{.*}}, i32 %{{.*}}, double %{{.*}})
// CHECK32: ret i32 [[CALL]]
// CHECK32: }
//
// CHECK64-LABEL: define linkonce_odr i32 @"\01??_9C@@$B7AA"(%struct.C* %this, i32, double) unnamed_addr
// CHECK64: [[VPTR:%.*]] = getelementptr inbounds i32 (%struct.C*, i32, double)** %{{.*}}, i64 1
// CHECK64: [[CALLEE:%.*]] = load i32 (%struct.C*, i32, double)** [[VPTR]]
// CHECK64: [[CALL:%.*]] = musttail call i32 [[CALLEE]](%struct.C* %{{.*}}, i32 %{{.*}}, double %{{.*}})
// CHECK64: [[CALL:%.*]] = call i32 [[CALLEE]](%struct.C* %{{.*}}, i32 %{{.*}}, double %{{.*}})
// CHECK64: ret i32 [[CALL]]
// CHECK64: }
@ -94,14 +98,14 @@ void f() {
// CHECK32-LABEL: define linkonce_odr x86_thiscallcc void @"\01??_9C@@$B7AE"(%struct.C* %this, %struct.S* noalias sret %agg.result, i32) unnamed_addr
// CHECK32: [[VPTR:%.*]] = getelementptr inbounds void (%struct.C*, %struct.S*, i32)** %{{.*}}, i64 2
// CHECK32: [[CALLEE:%.*]] = load void (%struct.C*, %struct.S*, i32)** [[VPTR]]
// CHECK32: musttail call x86_thiscallcc void [[CALLEE]](%struct.C* %{{.*}}, %struct.S* sret %agg.result, i32 %{{.*}})
// CHECK32: call x86_thiscallcc void [[CALLEE]](%struct.C* %{{.*}}, %struct.S* sret %agg.result, i32 %{{.*}})
// CHECK32: ret void
// CHECK32: }
//
// CHECK64-LABEL: define linkonce_odr void @"\01??_9C@@$BBA@AA"(%struct.C* %this, %struct.S* noalias sret %agg.result, i32) unnamed_addr
// CHECK64: [[VPTR:%.*]] = getelementptr inbounds void (%struct.C*, %struct.S*, i32)** %{{.*}}, i64 2
// CHECK64: [[CALLEE:%.*]] = load void (%struct.C*, %struct.S*, i32)** [[VPTR]]
// CHECK64: musttail call void [[CALLEE]](%struct.C* %{{.*}}, %struct.S* sret %agg.result, i32 %{{.*}})
// CHECK64: call void [[CALLEE]](%struct.C* %{{.*}}, %struct.S* sret %agg.result, i32 %{{.*}})
// CHECK64: ret void
// CHECK64: }
@ -109,28 +113,45 @@ void f() {
// CHECK32-LABEL: define internal x86_thiscallcc void @"\01??_9D@?A@@$BA@AE"(%"struct.(anonymous namespace)::D"* %this) unnamed_addr
// CHECK32: [[VPTR:%.*]] = getelementptr inbounds void (%"struct.(anonymous namespace)::D"*)** %{{.*}}, i64 0
// CHECK32: [[CALLEE:%.*]] = load void (%"struct.(anonymous namespace)::D"*)** [[VPTR]]
// CHECK32: musttail call x86_thiscallcc void [[CALLEE]](%"struct.(anonymous namespace)::D"* %{{.*}})
// CHECK32: call x86_thiscallcc void [[CALLEE]](%"struct.(anonymous namespace)::D"* %{{.*}})
// CHECK32: ret void
// CHECK32: }
//
// CHECK64-LABEL: define internal void @"\01??_9D@?A@@$BA@AA"(%"struct.(anonymous namespace)::D"* %this) unnamed_addr
// CHECK64: [[VPTR:%.*]] = getelementptr inbounds void (%"struct.(anonymous namespace)::D"*)** %{{.*}}, i64 0
// CHECK64: [[CALLEE:%.*]] = load void (%"struct.(anonymous namespace)::D"*)** [[VPTR]]
// CHECK64: musttail call void [[CALLEE]](%"struct.(anonymous namespace)::D"* %{{.*}})
// CHECK64: call void [[CALLEE]](%"struct.(anonymous namespace)::D"* %{{.*}})
// CHECK64: ret void
// CHECK64: }
// Thunk for calling the fourth virtual function in C, taking a struct parameter and returning a struct.
// Thunk for calling the fourth virtual function in C, taking a struct parameter
// and returning a struct.
// CHECK32-LABEL: define linkonce_odr x86_thiscallcc %struct.S* @"\01??_9C@@$BM@AE"(%struct.C* %this, <{ %struct.S*, %struct.U }>* inalloca) unnamed_addr
// CHECK32: [[VPTR:%.*]] = getelementptr inbounds %struct.S* (%struct.C*, <{ %struct.S*, %struct.U }>*)** %{{.*}}, i64 3
// CHECK32: [[CALLEE:%.*]] = load %struct.S* (%struct.C*, <{ %struct.S*, %struct.U }>*)** [[VPTR]]
// CHECK32: [[CALL:%.*]] = musttail call x86_thiscallcc %struct.S* [[CALLEE]](%struct.C* %this, <{ %struct.S*, %struct.U }>* inalloca %{{.*}})
// CHECK32: ret %struct.S* [[CALL]]
// CHECK32: [[CALL:%.*]] = musttail call x86_thiscallcc %struct.S* [[CALLEE]](%struct.C* %{{.*}}, <{ %struct.S*, %struct.U }>* inalloca %{{.*}})
// CHECK32-NEXT: ret %struct.S* [[CALL]]
// CHECK32: }
//
// CHECK64-LABEL: define linkonce_odr void @"\01??_9C@@$BBI@AA"(%struct.C* %this, %struct.S* noalias sret %agg.result, %struct.U*) unnamed_addr
// CHECK64: [[VPTR:%.*]] = getelementptr inbounds void (%struct.C*, %struct.S*, %struct.U*)** %{{.*}}, i64 3
// CHECK64: [[CALLEE:%.*]] = load void (%struct.C*, %struct.S*, %struct.U*)** [[VPTR]]
// CHECK64: musttail call void [[CALLEE]](%struct.C* %this, %struct.S* sret %agg.result, %struct.U* %{{.*}})
// CHECK64: call void [[CALLEE]](%struct.C* %{{.*}}, %struct.S* sret %agg.result, %struct.U* %{{.*}})
// CHECK64: ret void
// CHECK64: }
// Thunk for calling the fifth virtual function in C, taking a struct parameter
// and returning a struct.
// CHECK32-LABEL: define linkonce_odr x86_fastcallcc void @"\01??_9C@@$BBA@AE"(%struct.C* inreg %this, %struct.S* inreg noalias sret %agg.result, <{ %struct.U }>* inalloca) unnamed_addr
// CHECK32: [[VPTR:%.*]] = getelementptr inbounds void (%struct.C*, %struct.S*, <{ %struct.U }>*)** %{{.*}}, i64 4
// CHECK32: [[CALLEE:%.*]] = load void (%struct.C*, %struct.S*, <{ %struct.U }>*)** [[VPTR]]
// CHECK32: musttail call x86_fastcallcc void [[CALLEE]](%struct.C* inreg %{{.*}}, %struct.S* inreg sret %{{.*}}, <{ %struct.U }>* inalloca %{{.*}})
// CHECK32-NEXT: ret void
// CHECK32: }
//
// CHECK64-LABEL: define linkonce_odr void @"\01??_9C@@$BCA@AA"(%struct.C* %this, %struct.S* noalias sret %agg.result, %struct.U*) unnamed_addr
// CHECK64: [[VPTR:%.*]] = getelementptr inbounds void (%struct.C*, %struct.S*, %struct.U*)** %{{.*}}, i64 4
// CHECK64: [[CALLEE:%.*]] = load void (%struct.C*, %struct.S*, %struct.U*)** [[VPTR]]
// CHECK64: call void [[CALLEE]](%struct.C* %{{.*}}, %struct.S* sret %agg.result, %struct.U* %{{.*}})
// CHECK64: ret void
// CHECK64: }