diff --git a/clang/include/clang/CodeGen/CGFunctionInfo.h b/clang/include/clang/CodeGen/CGFunctionInfo.h index 588c96afe402..2a41ab9eece7 100644 --- a/clang/include/clang/CodeGen/CGFunctionInfo.h +++ b/clang/include/clang/CodeGen/CGFunctionInfo.h @@ -88,7 +88,6 @@ private: Kind TheKind; bool PaddingInReg : 1; bool InAllocaSRet : 1; // isInAlloca() - bool InAllocaIndirect : 1;// isInAlloca() bool IndirectByVal : 1; // isIndirect() bool IndirectRealign : 1; // isIndirect() bool SRetAfterThis : 1; // isIndirect() @@ -111,8 +110,8 @@ private: public: ABIArgInfo(Kind K = Direct) - : TypeData(nullptr), PaddingType(nullptr), DirectOffset(0), TheKind(K), - PaddingInReg(false), InAllocaSRet(false), InAllocaIndirect(false), + : TypeData(nullptr), PaddingType(nullptr), DirectOffset(0), + TheKind(K), PaddingInReg(false), InAllocaSRet(false), IndirectByVal(false), IndirectRealign(false), SRetAfterThis(false), InReg(false), CanBeFlattened(false), SignExt(false) {} @@ -186,10 +185,9 @@ public: AI.setInReg(true); return AI; } - static ABIArgInfo getInAlloca(unsigned FieldIndex, bool Indirect = false) { + static ABIArgInfo getInAlloca(unsigned FieldIndex) { auto AI = ABIArgInfo(InAlloca); AI.setInAllocaFieldIndex(FieldIndex); - AI.setInAllocaIndirect(Indirect); return AI; } static ABIArgInfo getExpand() { @@ -382,15 +380,6 @@ public: AllocaFieldIndex = FieldIndex; } - unsigned getInAllocaIndirect() const { - assert(isInAlloca() && "Invalid kind!"); - return InAllocaIndirect; - } - void setInAllocaIndirect(bool Indirect) { - assert(isInAlloca() && "Invalid kind!"); - InAllocaIndirect = Indirect; - } - /// Return true if this field of an inalloca struct should be returned /// to implement a struct return calling convention. bool getInAllocaSRet() const { diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 5b03f37f4498..3a50e2b103f6 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -2339,9 +2339,6 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, auto FieldIndex = ArgI.getInAllocaFieldIndex(); Address V = Builder.CreateStructGEP(ArgStruct, FieldIndex, Arg->getName()); - if (ArgI.getInAllocaIndirect()) - V = Address(Builder.CreateLoad(V), - getContext().getTypeAlignInChars(Ty)); ArgVals.push_back(ParamValue::forIndirect(V)); break; } @@ -4041,39 +4038,18 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, assert(NumIRArgs == 0); assert(getTarget().getTriple().getArch() == llvm::Triple::x86); if (I->isAggregate()) { + // Replace the placeholder with the appropriate argument slot GEP. Address Addr = I->hasLValue() ? I->getKnownLValue().getAddress(*this) : I->getKnownRValue().getAggregateAddress(); llvm::Instruction *Placeholder = cast(Addr.getPointer()); - - if (!ArgInfo.getInAllocaIndirect()) { - // Replace the placeholder with the appropriate argument slot GEP. - CGBuilderTy::InsertPoint IP = Builder.saveIP(); - Builder.SetInsertPoint(Placeholder); - Addr = Builder.CreateStructGEP(ArgMemory, - ArgInfo.getInAllocaFieldIndex()); - Builder.restoreIP(IP); - } else { - // For indirect things such as overaligned structs, replace the - // placeholder with a regular aggregate temporary alloca. Store the - // address of this alloca into the struct. - Addr = CreateMemTemp(info_it->type, "inalloca.indirect.tmp"); - Address ArgSlot = Builder.CreateStructGEP( - ArgMemory, ArgInfo.getInAllocaFieldIndex()); - Builder.CreateStore(Addr.getPointer(), ArgSlot); - } - deferPlaceholderReplacement(Placeholder, Addr.getPointer()); - } else if (ArgInfo.getInAllocaIndirect()) { - // Make a temporary alloca and store the address of it into the argument - // struct. - Address Addr = CreateMemTempWithoutCast( - I->Ty, getContext().getTypeAlignInChars(I->Ty), - "indirect-arg-temp"); - I->copyInto(*this, Addr); - Address ArgSlot = + CGBuilderTy::InsertPoint IP = Builder.saveIP(); + Builder.SetInsertPoint(Placeholder); + Addr = Builder.CreateStructGEP(ArgMemory, ArgInfo.getInAllocaFieldIndex()); - Builder.CreateStore(Addr.getPointer(), ArgSlot); + Builder.restoreIP(IP); + deferPlaceholderReplacement(Placeholder, Addr.getPointer()); } else { // Store the RValue into the argument struct. Address Addr = diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 08ef8ff64976..c803785435ff 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -1676,7 +1676,6 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, bool IsVectorCall = State.CC == llvm::CallingConv::X86_VectorCall; Ty = useFirstFieldIfTransparentUnion(Ty); - TypeInfo TI = getContext().getTypeInfo(Ty); // Check with the C++ ABI first. const RecordType *RT = Ty->getAs(); @@ -1726,7 +1725,7 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, bool NeedsPadding = false; bool InReg; if (shouldAggregateUseDirect(Ty, State, InReg, NeedsPadding)) { - unsigned SizeInRegs = (TI.Width + 31) / 32; + unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 31) / 32; SmallVector Elements(SizeInRegs, Int32); llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements); if (InReg) @@ -1736,19 +1735,14 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, } llvm::IntegerType *PaddingType = NeedsPadding ? Int32 : nullptr; - // Pass over-aligned aggregates on Windows indirectly. This behavior was - // added in MSVC 2015. - if (IsWin32StructABI && TI.AlignIsRequired && TI.Align > 32) - return getIndirectResult(Ty, /*ByVal=*/false, State); - // Expand small (<= 128-bit) record types when we know that the stack layout // of those arguments will match the struct. This is important because the // LLVM backend isn't smart enough to remove byval, which inhibits many // optimizations. // Don't do this for the MCU if there are still free integer registers // (see X86_64 ABI for full explanation). - if (TI.Width <= 4 * 32 && (!IsMCUABI || State.FreeRegs == 0) && - canExpandIndirectArgument(Ty)) + if (getContext().getTypeSize(Ty) <= 4 * 32 && + (!IsMCUABI || State.FreeRegs == 0) && canExpandIndirectArgument(Ty)) return ABIArgInfo::getExpandWithPadding( IsFastCall || IsVectorCall || IsRegCall, PaddingType); @@ -1756,24 +1750,14 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, } if (const VectorType *VT = Ty->getAs()) { - // On Windows, vectors are passed directly if registers are available, or - // indirectly if not. This avoids the need to align argument memory. Pass - // user-defined vector types larger than 512 bits indirectly for simplicity. - if (IsWin32StructABI) { - if (TI.Width <= 512 && State.FreeSSERegs > 0) { - --State.FreeSSERegs; - return ABIArgInfo::getDirectInReg(); - } - return getIndirectResult(Ty, /*ByVal=*/false, State); - } - // On Darwin, some vectors are passed in memory, we handle this by passing // it as an i8/i16/i32/i64. if (IsDarwinVectorABI) { - if ((TI.Width == 8 || TI.Width == 16 || TI.Width == 32) || - (TI.Width == 64 && VT->getNumElements() == 1)) - return ABIArgInfo::getDirect( - llvm::IntegerType::get(getVMContext(), TI.Width)); + uint64_t Size = getContext().getTypeSize(Ty); + if ((Size == 8 || Size == 16 || Size == 32) || + (Size == 64 && VT->getNumElements() == 1)) + return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), + Size)); } if (IsX86_MMXType(CGT.ConvertType(Ty))) @@ -1803,10 +1787,9 @@ void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const { CCState State(FI); if (IsMCUABI) State.FreeRegs = 3; - else if (State.CC == llvm::CallingConv::X86_FastCall) { + else if (State.CC == llvm::CallingConv::X86_FastCall) State.FreeRegs = 2; - State.FreeSSERegs = 3; - } else if (State.CC == llvm::CallingConv::X86_VectorCall) { + else if (State.CC == llvm::CallingConv::X86_VectorCall) { State.FreeRegs = 2; State.FreeSSERegs = 6; } else if (FI.getHasRegParm()) @@ -1814,11 +1797,6 @@ void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const { else if (State.CC == llvm::CallingConv::X86_RegCall) { State.FreeRegs = 5; State.FreeSSERegs = 8; - } else if (IsWin32StructABI) { - // Since MSVC 2015, the first three SSE vectors have been passed in - // registers. The rest are passed indirectly. - State.FreeRegs = DefaultNumRegisterParameters; - State.FreeSSERegs = 3; } else State.FreeRegs = DefaultNumRegisterParameters; @@ -1865,25 +1843,16 @@ X86_32ABIInfo::addFieldToArgStruct(SmallVector &FrameFields, CharUnits &StackOffset, ABIArgInfo &Info, QualType Type) const { // Arguments are always 4-byte-aligned. - CharUnits WordSize = CharUnits::fromQuantity(4); - assert(StackOffset.isMultipleOf(WordSize) && "unaligned inalloca struct"); + CharUnits FieldAlign = CharUnits::fromQuantity(4); - // sret pointers and indirect things will require an extra pointer - // indirection, unless they are byval. Most things are byval, and will not - // require this indirection. - bool IsIndirect = false; - if (Info.isIndirect() && !Info.getIndirectByVal()) - IsIndirect = true; - Info = ABIArgInfo::getInAlloca(FrameFields.size(), IsIndirect); - llvm::Type *LLTy = CGT.ConvertTypeForMem(Type); - if (IsIndirect) - LLTy = LLTy->getPointerTo(0); - FrameFields.push_back(LLTy); - StackOffset += IsIndirect ? WordSize : getContext().getTypeSizeInChars(Type); + assert(StackOffset.isMultipleOf(FieldAlign) && "unaligned inalloca struct"); + Info = ABIArgInfo::getInAlloca(FrameFields.size()); + FrameFields.push_back(CGT.ConvertTypeForMem(Type)); + StackOffset += getContext().getTypeSizeInChars(Type); // Insert padding bytes to respect alignment. CharUnits FieldEnd = StackOffset; - StackOffset = FieldEnd.alignTo(WordSize); + StackOffset = FieldEnd.alignTo(FieldAlign); if (StackOffset != FieldEnd) { CharUnits NumBytes = StackOffset - FieldEnd; llvm::Type *Ty = llvm::Type::getInt8Ty(getVMContext()); @@ -1897,12 +1866,16 @@ static bool isArgInAlloca(const ABIArgInfo &Info) { switch (Info.getKind()) { case ABIArgInfo::InAlloca: return true; + case ABIArgInfo::Indirect: + assert(Info.getIndirectByVal()); + return true; case ABIArgInfo::Ignore: return false; - case ABIArgInfo::Indirect: case ABIArgInfo::Direct: case ABIArgInfo::Extend: - return !Info.getInReg(); + if (Info.getInReg()) + return false; + return true; case ABIArgInfo::Expand: case ABIArgInfo::CoerceAndExpand: // These are aggregate types which are never passed in registers when @@ -1936,7 +1909,8 @@ void X86_32ABIInfo::rewriteWithInAlloca(CGFunctionInfo &FI) const { // Put the sret parameter into the inalloca struct if it's in memory. if (Ret.isIndirect() && !Ret.getInReg()) { - addFieldToArgStruct(FrameFields, StackOffset, Ret, FI.getReturnType()); + CanQualType PtrTy = getContext().getPointerType(FI.getReturnType()); + addFieldToArgStruct(FrameFields, StackOffset, Ret, PtrTy); // On Windows, the hidden sret parameter is always returned in eax. Ret.setInAllocaSRet(IsWin32StructABI); } diff --git a/clang/test/CodeGen/x86_32-arguments-win32.c b/clang/test/CodeGen/x86_32-arguments-win32.c index 33a6216a62bd..65e25f32c250 100644 --- a/clang/test/CodeGen/x86_32-arguments-win32.c +++ b/clang/test/CodeGen/x86_32-arguments-win32.c @@ -46,47 +46,3 @@ struct s6 { struct s6 f6_1(void) { while (1) {} } void f6_2(struct s6 a0) {} - -// MSVC passes up to three vectors in registers, and the rest indirectly. We -// (arbitrarily) pass oversized vectors indirectly, since that is the safest way -// to do it. -typedef float __m128 __attribute__((__vector_size__(16), __aligned__(16))); -typedef float __m256 __attribute__((__vector_size__(32), __aligned__(32))); -typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64))); -typedef float __m1024 __attribute__((__vector_size__(128), __aligned__(128))); - -__m128 gv128; -__m256 gv256; -__m512 gv512; -__m1024 gv1024; - -void receive_vec_128(__m128 x, __m128 y, __m128 z, __m128 w, __m128 q) { - gv128 = x + y + z + w + q; -} -void receive_vec_256(__m256 x, __m256 y, __m256 z, __m256 w, __m256 q) { - gv256 = x + y + z + w + q; -} -void receive_vec_512(__m512 x, __m512 y, __m512 z, __m512 w, __m512 q) { - gv512 = x + y + z + w + q; -} -void receive_vec_1024(__m1024 x, __m1024 y, __m1024 z, __m1024 w, __m1024 q) { - gv1024 = x + y + z + w + q; -} -// CHECK-LABEL: define dso_local void @receive_vec_128(<4 x float> inreg %x, <4 x float> inreg %y, <4 x float> inreg %z, <4 x float>* %0, <4 x float>* %1) -// CHECK-LABEL: define dso_local void @receive_vec_256(<8 x float> inreg %x, <8 x float> inreg %y, <8 x float> inreg %z, <8 x float>* %0, <8 x float>* %1) -// CHECK-LABEL: define dso_local void @receive_vec_512(<16 x float> inreg %x, <16 x float> inreg %y, <16 x float> inreg %z, <16 x float>* %0, <16 x float>* %1) -// CHECK-LABEL: define dso_local void @receive_vec_1024(<32 x float>* %0, <32 x float>* %1, <32 x float>* %2, <32 x float>* %3, <32 x float>* %4) - -void pass_vec_128() { - __m128 z = {0}; - receive_vec_128(z, z, z, z, z); -} - -// CHECK-LABEL: define dso_local void @pass_vec_128() -// CHECK: call void @receive_vec_128(<4 x float> inreg %{{[^,)]*}}, <4 x float> inreg %{{[^,)]*}}, <4 x float> inreg %{{[^,)]*}}, <4 x float>* %{{[^,)]*}}, <4 x float>* %{{[^,)]*}}) - - -void __fastcall fastcall_indirect_vec(__m128 x, __m128 y, __m128 z, __m128 w, int edx, __m128 q) { - gv128 = x + y + z + w + q; -} -// CHECK-LABEL: define dso_local x86_fastcallcc void @"\01@fastcall_indirect_vec@84"(<4 x float> inreg %x, <4 x float> inreg %y, <4 x float> inreg %z, <4 x float>* inreg %0, i32 inreg %edx, <4 x float>* %1) diff --git a/clang/test/CodeGenCXX/inalloca-overaligned.cpp b/clang/test/CodeGenCXX/inalloca-overaligned.cpp deleted file mode 100644 index 910f0d92895e..000000000000 --- a/clang/test/CodeGenCXX/inalloca-overaligned.cpp +++ /dev/null @@ -1,52 +0,0 @@ -// RUN: %clang_cc1 -fms-extensions -w -triple i386-pc-win32 -emit-llvm -o - %s | FileCheck %s - -// PR44395 -// MSVC passes overaligned types indirectly since MSVC 2015. Make sure that -// works with inalloca. - -// FIXME: Pass non-trivial *and* overaligned types indirectly. Right now the C++ -// ABI rules say to use inalloca, and they take precedence, so it's not easy to -// implement this. - - -struct NonTrivial { - NonTrivial(); - NonTrivial(const NonTrivial &o); - int x; -}; - -struct __declspec(align(64)) OverAligned { - OverAligned(); - int buf[16]; -}; - -extern int gvi32; - -int receive_inalloca_overaligned(NonTrivial nt, OverAligned o) { - return nt.x + o.buf[0]; -} - -// CHECK-LABEL: define dso_local i32 @"?receive_inalloca_overaligned@@Y{{.*}}" -// CHECK-SAME: (<{ %struct.NonTrivial, %struct.OverAligned* }>* inalloca %0) - -int pass_inalloca_overaligned() { - gvi32 = receive_inalloca_overaligned(NonTrivial(), OverAligned()); - return gvi32; -} - -// CHECK-LABEL: define dso_local i32 @"?pass_inalloca_overaligned@@Y{{.*}}" -// CHECK: [[TMP:%[^ ]*]] = alloca %struct.OverAligned, align 64 -// CHECK: call i8* @llvm.stacksave() -// CHECK: alloca inalloca <{ %struct.NonTrivial, %struct.OverAligned* }> - -// Construct OverAligned into TMP. -// CHECK: call x86_thiscallcc %struct.OverAligned* @"??0OverAligned@@QAE@XZ"(%struct.OverAligned* [[TMP]]) - -// Construct NonTrivial into the GEP. -// CHECK: [[GEP:%[^ ]*]] = getelementptr inbounds <{ %struct.NonTrivial, %struct.OverAligned* }>, <{ %struct.NonTrivial, %struct.OverAligned* }>* %{{.*}}, i32 0, i32 0 -// CHECK: call x86_thiscallcc %struct.NonTrivial* @"??0NonTrivial@@QAE@XZ"(%struct.NonTrivial* [[GEP]]) - -// Store the address of an OverAligned temporary into the struct. -// CHECK: getelementptr inbounds <{ %struct.NonTrivial, %struct.OverAligned* }>, <{ %struct.NonTrivial, %struct.OverAligned* }>* %{{.*}}, i32 0, i32 1 -// CHECK: store %struct.OverAligned* [[TMP]], %struct.OverAligned** %{{.*}}, align 4 -// CHECK: call i32 @"?receive_inalloca_overaligned@@Y{{.*}}"(<{ %struct.NonTrivial, %struct.OverAligned* }>* inalloca %argmem) diff --git a/clang/test/CodeGenCXX/inalloca-vector.cpp b/clang/test/CodeGenCXX/inalloca-vector.cpp deleted file mode 100644 index f3d7f81e9443..000000000000 --- a/clang/test/CodeGenCXX/inalloca-vector.cpp +++ /dev/null @@ -1,79 +0,0 @@ -// RUN: %clang_cc1 -w -triple i686-pc-win32 -emit-llvm -o - %s | FileCheck %s - -// PR44395 -// MSVC passes up to three vectors in registers, and the rest indirectly. Check -// that both are compatible with an inalloca prototype. - -struct NonTrivial { - NonTrivial(); - NonTrivial(const NonTrivial &o); - unsigned handle; -}; - -typedef float __m128 __attribute__((__vector_size__(16), __aligned__(16))); -__m128 gv128; - -// nt, w, and q will be in the inalloca pack. -void receive_vec_128(NonTrivial nt, __m128 x, __m128 y, __m128 z, __m128 w, __m128 q) { - gv128 = x + y + z + w + q; -} -// CHECK-LABEL: define dso_local void @"?receive_vec_128@@YAXUNonTrivial@@T__m128@@1111@Z" -// CHECK-SAME: (<4 x float> inreg %x, -// CHECK-SAME: <4 x float> inreg %y, -// CHECK-SAME: <4 x float> inreg %z, -// CHECK-SAME: <{ %struct.NonTrivial, <4 x float>*, <4 x float>* }>* inalloca %0) - -void pass_vec_128() { - __m128 z = {0}; - receive_vec_128(NonTrivial(), z, z, z, z, z); -} -// CHECK-LABEL: define dso_local void @"?pass_vec_128@@YAXXZ"() -// CHECK: getelementptr inbounds <{ %struct.NonTrivial, <4 x float>*, <4 x float>* }>, <{ %struct.NonTrivial, <4 x float>*, <4 x float>* }>* %{{[^,]*}}, i32 0, i32 0 -// CHECK: call x86_thiscallcc %struct.NonTrivial* @"??0NonTrivial@@QAE@XZ"(%struct.NonTrivial* %{{.*}}) - -// Store q, store temp alloca. -// CHECK: store <4 x float> %{{[^,]*}}, <4 x float>* %{{[^,]*}}, align 16 -// CHECK: getelementptr inbounds <{ %struct.NonTrivial, <4 x float>*, <4 x float>* }>, <{ %struct.NonTrivial, <4 x float>*, <4 x float>* }>* %{{[^,]*}}, i32 0, i32 1 -// CHECK: store <4 x float>* %{{[^,]*}}, <4 x float>** %{{[^,]*}}, align 4 - -// Store w, store temp alloca. -// CHECK: store <4 x float> %{{[^,]*}}, <4 x float>* %{{[^,]*}}, align 16 -// CHECK: getelementptr inbounds <{ %struct.NonTrivial, <4 x float>*, <4 x float>* }>, <{ %struct.NonTrivial, <4 x float>*, <4 x float>* }>* %{{[^,]*}}, i32 0, i32 2 -// CHECK: store <4 x float>* %{{[^,]*}}, <4 x float>** %{{[^,]*}}, align 4 - -// CHECK: call void @"?receive_vec_128@@YAXUNonTrivial@@T__m128@@1111@Z" -// CHECK-SAME: (<4 x float> inreg %{{[^,]*}}, -// CHECK-SAME: <4 x float> inreg %{{[^,]*}}, -// CHECK-SAME: <4 x float> inreg %{{[^,]*}}, -// CHECK-SAME: <{ %struct.NonTrivial, <4 x float>*, <4 x float>* }>* inalloca %{{[^,]*}}) - -// w will be passed indirectly by register, and q will be passed indirectly, but -// the pointer will be in memory. -void __fastcall fastcall_receive_vec(__m128 x, __m128 y, __m128 z, __m128 w, int edx, __m128 q, NonTrivial nt) { - gv128 = x + y + z + w + q; -} -// CHECK-LABEL: define dso_local x86_fastcallcc void @"?fastcall_receive_vec@@Y{{[^"]*}}" -// CHECK-SAME: (<4 x float> inreg %x, -// CHECK-SAME: <4 x float> inreg %y, -// CHECK-SAME: <4 x float> inreg %z, -// CHECK-SAME: <4 x float>* inreg %0, -// CHECK-SAME: i32 inreg %edx, -// CHECK-SAME: <{ <4 x float>*, %struct.NonTrivial }>* inalloca %1) - - -void __vectorcall vectorcall_receive_vec(double xmm0, double xmm1, double xmm2, - __m128 x, __m128 y, __m128 z, - __m128 w, int edx, __m128 q, NonTrivial nt) { - gv128 = x + y + z + w + q; -} -// FIXME: Enable these checks, clang generates wrong IR. -// CHECK-LABEL: define dso_local x86_vectorcallcc void @"?vectorcall_receive_vec@@Y{{[^"]*}}" -// CHECKX-SAME: (double inreg %xmm0, -// CHECKX-SAME: double inreg %xmm1, -// CHECKX-SAME: double inreg %xmm2, -// CHECKX-SAME: <4 x float> inreg %x, -// CHECKX-SAME: <4 x float> inreg %y, -// CHECKX-SAME: <4 x float> inreg %z, -// CHECKX-SAME: <4 x float>* inreg %0, -// CHECKX-SAME: i32 inreg %edx, -// CHECKX-SAME: <{ <4 x float>*, %struct.NonTrivial }>* inalloca %1)