forked from OSchip/llvm-project
ignore structs that wrap vectors in IR, the abstraction shouldn't add penalty.
Before we'd compile the example into something like: %coerce.dive2 = getelementptr %struct.v4f32wrapper* %retval, i32 0, i32 0 ; <<4 x float>*> [#uses=1] %1 = bitcast <4 x float>* %coerce.dive2 to <2 x double>* ; <<2 x double>*> [#uses=1] %2 = load <2 x double>* %1, align 1 ; <<2 x double>> [#uses=1] ret <2 x double> %2 Now we produce: %coerce.dive2 = getelementptr %struct.v4f32wrapper* %retval, i32 0, i32 0 ; <<4 x float>*> [#uses=1] %0 = load <4 x float>* %coerce.dive2, align 1 ; <<4 x float>> [#uses=1] ret <4 x float> %0 llvm-svn: 109732
This commit is contained in:
parent
4200fe4e50
commit
9fa15c3608
|
@ -1185,9 +1185,20 @@ ABIArgInfo X86_64ABIInfo::getIndirectResult(QualType Ty) const {
|
|||
/// full vector XMM register. Pick an LLVM IR type that will be passed as a
|
||||
/// vector register.
|
||||
const llvm::Type *X86_64ABIInfo::Get16ByteVectorType(QualType Ty) const {
|
||||
const llvm::Type *IRType = CGT.ConvertTypeRecursive(Ty);
|
||||
|
||||
// Wrapper structs that just contain vectors are passed just like vectors,
|
||||
// strip them off if present.
|
||||
const llvm::StructType *STy = dyn_cast<llvm::StructType>(IRType);
|
||||
while (STy && STy->getNumElements() == 1) {
|
||||
IRType = STy->getElementType(0);
|
||||
STy = dyn_cast<llvm::StructType>(IRType);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// If the preferred type is a 16-byte vector, prefer to pass it.
|
||||
if (const llvm::VectorType *VT =
|
||||
dyn_cast<llvm::VectorType>(CGT.ConvertTypeRecursive(Ty))){
|
||||
if (const llvm::VectorType *VT = dyn_cast<llvm::VectorType>(IRType)){
|
||||
const llvm::Type *EltTy = VT->getElementType();
|
||||
if (VT->getBitWidth() == 128 &&
|
||||
(EltTy->isFloatTy() || EltTy->isDoubleTy() ||
|
||||
|
|
|
@ -168,3 +168,13 @@ struct foo26 f26(struct foo26 *P) {
|
|||
// CHECK: define %struct.foo26 @f26(%struct.foo26* %P)
|
||||
return *P;
|
||||
}
|
||||
|
||||
|
||||
struct v4f32wrapper {
|
||||
v4f32 v;
|
||||
};
|
||||
|
||||
struct v4f32wrapper f27(struct v4f32wrapper X) {
|
||||
// CHECK: define <4 x float> @f27(<4 x float> %X.coerce)
|
||||
return X;
|
||||
}
|
Loading…
Reference in New Issue