forked from OSchip/llvm-project
Re-land "[MS] Overhaul how clang passes overaligned args on x86_32"
This brings back2af74e27ed
and revertseaabaf7e04
. The changes were correct, the code that was broken contained an ODR violation that assumed that these types are passed equivalently: struct alignas(uint64_t) Wrapper { uint64_t P }; void f(uint64_t p); void f(Wrapper p); MSVC does not pass them the same way, and so clang-cl should not pass them the same way either.
This commit is contained in:
parent
413307d456
commit
2c6a3896ab
|
@ -88,6 +88,7 @@ private:
|
|||
Kind TheKind;
|
||||
bool PaddingInReg : 1;
|
||||
bool InAllocaSRet : 1; // isInAlloca()
|
||||
bool InAllocaIndirect : 1;// isInAlloca()
|
||||
bool IndirectByVal : 1; // isIndirect()
|
||||
bool IndirectRealign : 1; // isIndirect()
|
||||
bool SRetAfterThis : 1; // isIndirect()
|
||||
|
@ -110,8 +111,8 @@ private:
|
|||
|
||||
public:
|
||||
ABIArgInfo(Kind K = Direct)
|
||||
: TypeData(nullptr), PaddingType(nullptr), DirectOffset(0),
|
||||
TheKind(K), PaddingInReg(false), InAllocaSRet(false),
|
||||
: TypeData(nullptr), PaddingType(nullptr), DirectOffset(0), TheKind(K),
|
||||
PaddingInReg(false), InAllocaSRet(false), InAllocaIndirect(false),
|
||||
IndirectByVal(false), IndirectRealign(false), SRetAfterThis(false),
|
||||
InReg(false), CanBeFlattened(false), SignExt(false) {}
|
||||
|
||||
|
@ -185,9 +186,10 @@ public:
|
|||
AI.setInReg(true);
|
||||
return AI;
|
||||
}
|
||||
static ABIArgInfo getInAlloca(unsigned FieldIndex) {
|
||||
static ABIArgInfo getInAlloca(unsigned FieldIndex, bool Indirect = false) {
|
||||
auto AI = ABIArgInfo(InAlloca);
|
||||
AI.setInAllocaFieldIndex(FieldIndex);
|
||||
AI.setInAllocaIndirect(Indirect);
|
||||
return AI;
|
||||
}
|
||||
static ABIArgInfo getExpand() {
|
||||
|
@ -380,6 +382,15 @@ public:
|
|||
AllocaFieldIndex = FieldIndex;
|
||||
}
|
||||
|
||||
unsigned getInAllocaIndirect() const {
|
||||
assert(isInAlloca() && "Invalid kind!");
|
||||
return InAllocaIndirect;
|
||||
}
|
||||
void setInAllocaIndirect(bool Indirect) {
|
||||
assert(isInAlloca() && "Invalid kind!");
|
||||
InAllocaIndirect = Indirect;
|
||||
}
|
||||
|
||||
/// Return true if this field of an inalloca struct should be returned
|
||||
/// to implement a struct return calling convention.
|
||||
bool getInAllocaSRet() const {
|
||||
|
|
|
@ -2370,6 +2370,9 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
|
|||
auto FieldIndex = ArgI.getInAllocaFieldIndex();
|
||||
Address V =
|
||||
Builder.CreateStructGEP(ArgStruct, FieldIndex, Arg->getName());
|
||||
if (ArgI.getInAllocaIndirect())
|
||||
V = Address(Builder.CreateLoad(V),
|
||||
getContext().getTypeAlignInChars(Ty));
|
||||
ArgVals.push_back(ParamValue::forIndirect(V));
|
||||
break;
|
||||
}
|
||||
|
@ -4091,18 +4094,39 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
|
|||
assert(NumIRArgs == 0);
|
||||
assert(getTarget().getTriple().getArch() == llvm::Triple::x86);
|
||||
if (I->isAggregate()) {
|
||||
// Replace the placeholder with the appropriate argument slot GEP.
|
||||
Address Addr = I->hasLValue()
|
||||
? I->getKnownLValue().getAddress(*this)
|
||||
: I->getKnownRValue().getAggregateAddress();
|
||||
llvm::Instruction *Placeholder =
|
||||
cast<llvm::Instruction>(Addr.getPointer());
|
||||
CGBuilderTy::InsertPoint IP = Builder.saveIP();
|
||||
Builder.SetInsertPoint(Placeholder);
|
||||
Addr =
|
||||
Builder.CreateStructGEP(ArgMemory, ArgInfo.getInAllocaFieldIndex());
|
||||
Builder.restoreIP(IP);
|
||||
|
||||
if (!ArgInfo.getInAllocaIndirect()) {
|
||||
// Replace the placeholder with the appropriate argument slot GEP.
|
||||
CGBuilderTy::InsertPoint IP = Builder.saveIP();
|
||||
Builder.SetInsertPoint(Placeholder);
|
||||
Addr = Builder.CreateStructGEP(ArgMemory,
|
||||
ArgInfo.getInAllocaFieldIndex());
|
||||
Builder.restoreIP(IP);
|
||||
} else {
|
||||
// For indirect things such as overaligned structs, replace the
|
||||
// placeholder with a regular aggregate temporary alloca. Store the
|
||||
// address of this alloca into the struct.
|
||||
Addr = CreateMemTemp(info_it->type, "inalloca.indirect.tmp");
|
||||
Address ArgSlot = Builder.CreateStructGEP(
|
||||
ArgMemory, ArgInfo.getInAllocaFieldIndex());
|
||||
Builder.CreateStore(Addr.getPointer(), ArgSlot);
|
||||
}
|
||||
deferPlaceholderReplacement(Placeholder, Addr.getPointer());
|
||||
} else if (ArgInfo.getInAllocaIndirect()) {
|
||||
// Make a temporary alloca and store the address of it into the argument
|
||||
// struct.
|
||||
Address Addr = CreateMemTempWithoutCast(
|
||||
I->Ty, getContext().getTypeAlignInChars(I->Ty),
|
||||
"indirect-arg-temp");
|
||||
I->copyInto(*this, Addr);
|
||||
Address ArgSlot =
|
||||
Builder.CreateStructGEP(ArgMemory, ArgInfo.getInAllocaFieldIndex());
|
||||
Builder.CreateStore(Addr.getPointer(), ArgSlot);
|
||||
} else {
|
||||
// Store the RValue into the argument struct.
|
||||
Address Addr =
|
||||
|
|
|
@ -1702,6 +1702,7 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
|
|||
bool IsVectorCall = State.CC == llvm::CallingConv::X86_VectorCall;
|
||||
|
||||
Ty = useFirstFieldIfTransparentUnion(Ty);
|
||||
TypeInfo TI = getContext().getTypeInfo(Ty);
|
||||
|
||||
// Check with the C++ ABI first.
|
||||
const RecordType *RT = Ty->getAs<RecordType>();
|
||||
|
@ -1751,7 +1752,7 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
|
|||
bool NeedsPadding = false;
|
||||
bool InReg;
|
||||
if (shouldAggregateUseDirect(Ty, State, InReg, NeedsPadding)) {
|
||||
unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 31) / 32;
|
||||
unsigned SizeInRegs = (TI.Width + 31) / 32;
|
||||
SmallVector<llvm::Type*, 3> Elements(SizeInRegs, Int32);
|
||||
llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements);
|
||||
if (InReg)
|
||||
|
@ -1761,14 +1762,19 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
|
|||
}
|
||||
llvm::IntegerType *PaddingType = NeedsPadding ? Int32 : nullptr;
|
||||
|
||||
// Pass over-aligned aggregates on Windows indirectly. This behavior was
|
||||
// added in MSVC 2015.
|
||||
if (IsWin32StructABI && TI.AlignIsRequired && TI.Align > 32)
|
||||
return getIndirectResult(Ty, /*ByVal=*/false, State);
|
||||
|
||||
// Expand small (<= 128-bit) record types when we know that the stack layout
|
||||
// of those arguments will match the struct. This is important because the
|
||||
// LLVM backend isn't smart enough to remove byval, which inhibits many
|
||||
// optimizations.
|
||||
// Don't do this for the MCU if there are still free integer registers
|
||||
// (see X86_64 ABI for full explanation).
|
||||
if (getContext().getTypeSize(Ty) <= 4 * 32 &&
|
||||
(!IsMCUABI || State.FreeRegs == 0) && canExpandIndirectArgument(Ty))
|
||||
if (TI.Width <= 4 * 32 && (!IsMCUABI || State.FreeRegs == 0) &&
|
||||
canExpandIndirectArgument(Ty))
|
||||
return ABIArgInfo::getExpandWithPadding(
|
||||
IsFastCall || IsVectorCall || IsRegCall, PaddingType);
|
||||
|
||||
|
@ -1776,14 +1782,24 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
|
|||
}
|
||||
|
||||
if (const VectorType *VT = Ty->getAs<VectorType>()) {
|
||||
// On Windows, vectors are passed directly if registers are available, or
|
||||
// indirectly if not. This avoids the need to align argument memory. Pass
|
||||
// user-defined vector types larger than 512 bits indirectly for simplicity.
|
||||
if (IsWin32StructABI) {
|
||||
if (TI.Width <= 512 && State.FreeSSERegs > 0) {
|
||||
--State.FreeSSERegs;
|
||||
return ABIArgInfo::getDirectInReg();
|
||||
}
|
||||
return getIndirectResult(Ty, /*ByVal=*/false, State);
|
||||
}
|
||||
|
||||
// On Darwin, some vectors are passed in memory, we handle this by passing
|
||||
// it as an i8/i16/i32/i64.
|
||||
if (IsDarwinVectorABI) {
|
||||
uint64_t Size = getContext().getTypeSize(Ty);
|
||||
if ((Size == 8 || Size == 16 || Size == 32) ||
|
||||
(Size == 64 && VT->getNumElements() == 1))
|
||||
return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
|
||||
Size));
|
||||
if ((TI.Width == 8 || TI.Width == 16 || TI.Width == 32) ||
|
||||
(TI.Width == 64 && VT->getNumElements() == 1))
|
||||
return ABIArgInfo::getDirect(
|
||||
llvm::IntegerType::get(getVMContext(), TI.Width));
|
||||
}
|
||||
|
||||
if (IsX86_MMXType(CGT.ConvertType(Ty)))
|
||||
|
@ -1813,9 +1829,10 @@ void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const {
|
|||
CCState State(FI);
|
||||
if (IsMCUABI)
|
||||
State.FreeRegs = 3;
|
||||
else if (State.CC == llvm::CallingConv::X86_FastCall)
|
||||
else if (State.CC == llvm::CallingConv::X86_FastCall) {
|
||||
State.FreeRegs = 2;
|
||||
else if (State.CC == llvm::CallingConv::X86_VectorCall) {
|
||||
State.FreeSSERegs = 3;
|
||||
} else if (State.CC == llvm::CallingConv::X86_VectorCall) {
|
||||
State.FreeRegs = 2;
|
||||
State.FreeSSERegs = 6;
|
||||
} else if (FI.getHasRegParm())
|
||||
|
@ -1823,6 +1840,11 @@ void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const {
|
|||
else if (State.CC == llvm::CallingConv::X86_RegCall) {
|
||||
State.FreeRegs = 5;
|
||||
State.FreeSSERegs = 8;
|
||||
} else if (IsWin32StructABI) {
|
||||
// Since MSVC 2015, the first three SSE vectors have been passed in
|
||||
// registers. The rest are passed indirectly.
|
||||
State.FreeRegs = DefaultNumRegisterParameters;
|
||||
State.FreeSSERegs = 3;
|
||||
} else
|
||||
State.FreeRegs = DefaultNumRegisterParameters;
|
||||
|
||||
|
@ -1869,16 +1891,25 @@ X86_32ABIInfo::addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields,
|
|||
CharUnits &StackOffset, ABIArgInfo &Info,
|
||||
QualType Type) const {
|
||||
// Arguments are always 4-byte-aligned.
|
||||
CharUnits FieldAlign = CharUnits::fromQuantity(4);
|
||||
CharUnits WordSize = CharUnits::fromQuantity(4);
|
||||
assert(StackOffset.isMultipleOf(WordSize) && "unaligned inalloca struct");
|
||||
|
||||
assert(StackOffset.isMultipleOf(FieldAlign) && "unaligned inalloca struct");
|
||||
Info = ABIArgInfo::getInAlloca(FrameFields.size());
|
||||
FrameFields.push_back(CGT.ConvertTypeForMem(Type));
|
||||
StackOffset += getContext().getTypeSizeInChars(Type);
|
||||
// sret pointers and indirect things will require an extra pointer
|
||||
// indirection, unless they are byval. Most things are byval, and will not
|
||||
// require this indirection.
|
||||
bool IsIndirect = false;
|
||||
if (Info.isIndirect() && !Info.getIndirectByVal())
|
||||
IsIndirect = true;
|
||||
Info = ABIArgInfo::getInAlloca(FrameFields.size(), IsIndirect);
|
||||
llvm::Type *LLTy = CGT.ConvertTypeForMem(Type);
|
||||
if (IsIndirect)
|
||||
LLTy = LLTy->getPointerTo(0);
|
||||
FrameFields.push_back(LLTy);
|
||||
StackOffset += IsIndirect ? WordSize : getContext().getTypeSizeInChars(Type);
|
||||
|
||||
// Insert padding bytes to respect alignment.
|
||||
CharUnits FieldEnd = StackOffset;
|
||||
StackOffset = FieldEnd.alignTo(FieldAlign);
|
||||
StackOffset = FieldEnd.alignTo(WordSize);
|
||||
if (StackOffset != FieldEnd) {
|
||||
CharUnits NumBytes = StackOffset - FieldEnd;
|
||||
llvm::Type *Ty = llvm::Type::getInt8Ty(getVMContext());
|
||||
|
@ -1892,16 +1923,12 @@ static bool isArgInAlloca(const ABIArgInfo &Info) {
|
|||
switch (Info.getKind()) {
|
||||
case ABIArgInfo::InAlloca:
|
||||
return true;
|
||||
case ABIArgInfo::Indirect:
|
||||
assert(Info.getIndirectByVal());
|
||||
return true;
|
||||
case ABIArgInfo::Ignore:
|
||||
return false;
|
||||
case ABIArgInfo::Indirect:
|
||||
case ABIArgInfo::Direct:
|
||||
case ABIArgInfo::Extend:
|
||||
if (Info.getInReg())
|
||||
return false;
|
||||
return true;
|
||||
return !Info.getInReg();
|
||||
case ABIArgInfo::Expand:
|
||||
case ABIArgInfo::CoerceAndExpand:
|
||||
// These are aggregate types which are never passed in registers when
|
||||
|
@ -1935,8 +1962,7 @@ void X86_32ABIInfo::rewriteWithInAlloca(CGFunctionInfo &FI) const {
|
|||
|
||||
// Put the sret parameter into the inalloca struct if it's in memory.
|
||||
if (Ret.isIndirect() && !Ret.getInReg()) {
|
||||
CanQualType PtrTy = getContext().getPointerType(FI.getReturnType());
|
||||
addFieldToArgStruct(FrameFields, StackOffset, Ret, PtrTy);
|
||||
addFieldToArgStruct(FrameFields, StackOffset, Ret, FI.getReturnType());
|
||||
// On Windows, the hidden sret parameter is always returned in eax.
|
||||
Ret.setInAllocaSRet(IsWin32StructABI);
|
||||
}
|
||||
|
|
|
@ -46,3 +46,47 @@ struct s6 {
|
|||
struct s6 f6_1(void) { while (1) {} }
|
||||
void f6_2(struct s6 a0) {}
|
||||
|
||||
|
||||
// MSVC passes up to three vectors in registers, and the rest indirectly. We
|
||||
// (arbitrarily) pass oversized vectors indirectly, since that is the safest way
|
||||
// to do it.
|
||||
typedef float __m128 __attribute__((__vector_size__(16), __aligned__(16)));
|
||||
typedef float __m256 __attribute__((__vector_size__(32), __aligned__(32)));
|
||||
typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64)));
|
||||
typedef float __m1024 __attribute__((__vector_size__(128), __aligned__(128)));
|
||||
|
||||
__m128 gv128;
|
||||
__m256 gv256;
|
||||
__m512 gv512;
|
||||
__m1024 gv1024;
|
||||
|
||||
void receive_vec_128(__m128 x, __m128 y, __m128 z, __m128 w, __m128 q) {
|
||||
gv128 = x + y + z + w + q;
|
||||
}
|
||||
void receive_vec_256(__m256 x, __m256 y, __m256 z, __m256 w, __m256 q) {
|
||||
gv256 = x + y + z + w + q;
|
||||
}
|
||||
void receive_vec_512(__m512 x, __m512 y, __m512 z, __m512 w, __m512 q) {
|
||||
gv512 = x + y + z + w + q;
|
||||
}
|
||||
void receive_vec_1024(__m1024 x, __m1024 y, __m1024 z, __m1024 w, __m1024 q) {
|
||||
gv1024 = x + y + z + w + q;
|
||||
}
|
||||
// CHECK-LABEL: define dso_local void @receive_vec_128(<4 x float> inreg %x, <4 x float> inreg %y, <4 x float> inreg %z, <4 x float>* %0, <4 x float>* %1)
|
||||
// CHECK-LABEL: define dso_local void @receive_vec_256(<8 x float> inreg %x, <8 x float> inreg %y, <8 x float> inreg %z, <8 x float>* %0, <8 x float>* %1)
|
||||
// CHECK-LABEL: define dso_local void @receive_vec_512(<16 x float> inreg %x, <16 x float> inreg %y, <16 x float> inreg %z, <16 x float>* %0, <16 x float>* %1)
|
||||
// CHECK-LABEL: define dso_local void @receive_vec_1024(<32 x float>* %0, <32 x float>* %1, <32 x float>* %2, <32 x float>* %3, <32 x float>* %4)
|
||||
|
||||
void pass_vec_128() {
|
||||
__m128 z = {0};
|
||||
receive_vec_128(z, z, z, z, z);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define dso_local void @pass_vec_128()
|
||||
// CHECK: call void @receive_vec_128(<4 x float> inreg %{{[^,)]*}}, <4 x float> inreg %{{[^,)]*}}, <4 x float> inreg %{{[^,)]*}}, <4 x float>* %{{[^,)]*}}, <4 x float>* %{{[^,)]*}})
|
||||
|
||||
|
||||
void __fastcall fastcall_indirect_vec(__m128 x, __m128 y, __m128 z, __m128 w, int edx, __m128 q) {
|
||||
gv128 = x + y + z + w + q;
|
||||
}
|
||||
// CHECK-LABEL: define dso_local x86_fastcallcc void @"\01@fastcall_indirect_vec@84"(<4 x float> inreg %x, <4 x float> inreg %y, <4 x float> inreg %z, <4 x float>* inreg %0, i32 inreg %edx, <4 x float>* %1)
|
||||
|
|
|
@ -0,0 +1,52 @@
|
|||
// RUN: %clang_cc1 -fms-extensions -w -triple i386-pc-win32 -emit-llvm -o - %s | FileCheck %s
|
||||
|
||||
// PR44395
|
||||
// MSVC passes overaligned types indirectly since MSVC 2015. Make sure that
|
||||
// works with inalloca.
|
||||
|
||||
// FIXME: Pass non-trivial *and* overaligned types indirectly. Right now the C++
|
||||
// ABI rules say to use inalloca, and they take precedence, so it's not easy to
|
||||
// implement this.
|
||||
|
||||
|
||||
struct NonTrivial {
|
||||
NonTrivial();
|
||||
NonTrivial(const NonTrivial &o);
|
||||
int x;
|
||||
};
|
||||
|
||||
struct __declspec(align(64)) OverAligned {
|
||||
OverAligned();
|
||||
int buf[16];
|
||||
};
|
||||
|
||||
extern int gvi32;
|
||||
|
||||
int receive_inalloca_overaligned(NonTrivial nt, OverAligned o) {
|
||||
return nt.x + o.buf[0];
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define dso_local i32 @"?receive_inalloca_overaligned@@Y{{.*}}"
|
||||
// CHECK-SAME: (<{ %struct.NonTrivial, %struct.OverAligned* }>* inalloca %0)
|
||||
|
||||
int pass_inalloca_overaligned() {
|
||||
gvi32 = receive_inalloca_overaligned(NonTrivial(), OverAligned());
|
||||
return gvi32;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define dso_local i32 @"?pass_inalloca_overaligned@@Y{{.*}}"
|
||||
// CHECK: [[TMP:%[^ ]*]] = alloca %struct.OverAligned, align 64
|
||||
// CHECK: call i8* @llvm.stacksave()
|
||||
// CHECK: alloca inalloca <{ %struct.NonTrivial, %struct.OverAligned* }>
|
||||
|
||||
// Construct OverAligned into TMP.
|
||||
// CHECK: call x86_thiscallcc %struct.OverAligned* @"??0OverAligned@@QAE@XZ"(%struct.OverAligned* [[TMP]])
|
||||
|
||||
// Construct NonTrivial into the GEP.
|
||||
// CHECK: [[GEP:%[^ ]*]] = getelementptr inbounds <{ %struct.NonTrivial, %struct.OverAligned* }>, <{ %struct.NonTrivial, %struct.OverAligned* }>* %{{.*}}, i32 0, i32 0
|
||||
// CHECK: call x86_thiscallcc %struct.NonTrivial* @"??0NonTrivial@@QAE@XZ"(%struct.NonTrivial* [[GEP]])
|
||||
|
||||
// Store the address of an OverAligned temporary into the struct.
|
||||
// CHECK: getelementptr inbounds <{ %struct.NonTrivial, %struct.OverAligned* }>, <{ %struct.NonTrivial, %struct.OverAligned* }>* %{{.*}}, i32 0, i32 1
|
||||
// CHECK: store %struct.OverAligned* [[TMP]], %struct.OverAligned** %{{.*}}, align 4
|
||||
// CHECK: call i32 @"?receive_inalloca_overaligned@@Y{{.*}}"(<{ %struct.NonTrivial, %struct.OverAligned* }>* inalloca %argmem)
|
|
@ -0,0 +1,79 @@
|
|||
// RUN: %clang_cc1 -w -triple i686-pc-win32 -emit-llvm -o - %s | FileCheck %s
|
||||
|
||||
// PR44395
|
||||
// MSVC passes up to three vectors in registers, and the rest indirectly. Check
|
||||
// that both are compatible with an inalloca prototype.
|
||||
|
||||
struct NonTrivial {
|
||||
NonTrivial();
|
||||
NonTrivial(const NonTrivial &o);
|
||||
unsigned handle;
|
||||
};
|
||||
|
||||
typedef float __m128 __attribute__((__vector_size__(16), __aligned__(16)));
|
||||
__m128 gv128;
|
||||
|
||||
// nt, w, and q will be in the inalloca pack.
|
||||
void receive_vec_128(NonTrivial nt, __m128 x, __m128 y, __m128 z, __m128 w, __m128 q) {
|
||||
gv128 = x + y + z + w + q;
|
||||
}
|
||||
// CHECK-LABEL: define dso_local void @"?receive_vec_128@@YAXUNonTrivial@@T__m128@@1111@Z"
|
||||
// CHECK-SAME: (<4 x float> inreg %x,
|
||||
// CHECK-SAME: <4 x float> inreg %y,
|
||||
// CHECK-SAME: <4 x float> inreg %z,
|
||||
// CHECK-SAME: <{ %struct.NonTrivial, <4 x float>*, <4 x float>* }>* inalloca %0)
|
||||
|
||||
void pass_vec_128() {
|
||||
__m128 z = {0};
|
||||
receive_vec_128(NonTrivial(), z, z, z, z, z);
|
||||
}
|
||||
// CHECK-LABEL: define dso_local void @"?pass_vec_128@@YAXXZ"()
|
||||
// CHECK: getelementptr inbounds <{ %struct.NonTrivial, <4 x float>*, <4 x float>* }>, <{ %struct.NonTrivial, <4 x float>*, <4 x float>* }>* %{{[^,]*}}, i32 0, i32 0
|
||||
// CHECK: call x86_thiscallcc %struct.NonTrivial* @"??0NonTrivial@@QAE@XZ"(%struct.NonTrivial* %{{.*}})
|
||||
|
||||
// Store q, store temp alloca.
|
||||
// CHECK: store <4 x float> %{{[^,]*}}, <4 x float>* %{{[^,]*}}, align 16
|
||||
// CHECK: getelementptr inbounds <{ %struct.NonTrivial, <4 x float>*, <4 x float>* }>, <{ %struct.NonTrivial, <4 x float>*, <4 x float>* }>* %{{[^,]*}}, i32 0, i32 1
|
||||
// CHECK: store <4 x float>* %{{[^,]*}}, <4 x float>** %{{[^,]*}}, align 4
|
||||
|
||||
// Store w, store temp alloca.
|
||||
// CHECK: store <4 x float> %{{[^,]*}}, <4 x float>* %{{[^,]*}}, align 16
|
||||
// CHECK: getelementptr inbounds <{ %struct.NonTrivial, <4 x float>*, <4 x float>* }>, <{ %struct.NonTrivial, <4 x float>*, <4 x float>* }>* %{{[^,]*}}, i32 0, i32 2
|
||||
// CHECK: store <4 x float>* %{{[^,]*}}, <4 x float>** %{{[^,]*}}, align 4
|
||||
|
||||
// CHECK: call void @"?receive_vec_128@@YAXUNonTrivial@@T__m128@@1111@Z"
|
||||
// CHECK-SAME: (<4 x float> inreg %{{[^,]*}},
|
||||
// CHECK-SAME: <4 x float> inreg %{{[^,]*}},
|
||||
// CHECK-SAME: <4 x float> inreg %{{[^,]*}},
|
||||
// CHECK-SAME: <{ %struct.NonTrivial, <4 x float>*, <4 x float>* }>* inalloca %{{[^,]*}})
|
||||
|
||||
// w will be passed indirectly by register, and q will be passed indirectly, but
|
||||
// the pointer will be in memory.
|
||||
void __fastcall fastcall_receive_vec(__m128 x, __m128 y, __m128 z, __m128 w, int edx, __m128 q, NonTrivial nt) {
|
||||
gv128 = x + y + z + w + q;
|
||||
}
|
||||
// CHECK-LABEL: define dso_local x86_fastcallcc void @"?fastcall_receive_vec@@Y{{[^"]*}}"
|
||||
// CHECK-SAME: (<4 x float> inreg %x,
|
||||
// CHECK-SAME: <4 x float> inreg %y,
|
||||
// CHECK-SAME: <4 x float> inreg %z,
|
||||
// CHECK-SAME: <4 x float>* inreg %0,
|
||||
// CHECK-SAME: i32 inreg %edx,
|
||||
// CHECK-SAME: <{ <4 x float>*, %struct.NonTrivial }>* inalloca %1)
|
||||
|
||||
|
||||
void __vectorcall vectorcall_receive_vec(double xmm0, double xmm1, double xmm2,
|
||||
__m128 x, __m128 y, __m128 z,
|
||||
__m128 w, int edx, __m128 q, NonTrivial nt) {
|
||||
gv128 = x + y + z + w + q;
|
||||
}
|
||||
// FIXME: Enable these checks, clang generates wrong IR.
|
||||
// CHECK-LABEL: define dso_local x86_vectorcallcc void @"?vectorcall_receive_vec@@Y{{[^"]*}}"
|
||||
// CHECKX-SAME: (double inreg %xmm0,
|
||||
// CHECKX-SAME: double inreg %xmm1,
|
||||
// CHECKX-SAME: double inreg %xmm2,
|
||||
// CHECKX-SAME: <4 x float> inreg %x,
|
||||
// CHECKX-SAME: <4 x float> inreg %y,
|
||||
// CHECKX-SAME: <4 x float> inreg %z,
|
||||
// CHECKX-SAME: <4 x float>* inreg %0,
|
||||
// CHECKX-SAME: i32 inreg %edx,
|
||||
// CHECKX-SAME: <{ <4 x float>*, %struct.NonTrivial }>* inalloca %1)
|
Loading…
Reference in New Issue