[X86] Prevent passing vectors of __int128 as <X x i128> in llvm IR

As far as I can tell, gcc passes 256/512 bit vectors __int128 in memory. And passes a vector of 1 _int128 in an xmm register. The backend considers <X x i128> as an illegal type and will scalarize any arguments with that type. So we need to coerce the argument types in the frontend to match to avoid the illegal type.

I'm restricting this to change to Linux and NetBSD based on the
how similar ABI changes have been handled in the past.
PS4, FreeBSD, and Darwin are unaffected. I've also added a
new -fclang-abi-compat version to restore the old behavior.

This issue was identified in PR42607. Though even with the types changed, we still seem to be doing some unnecessary stack realignment.

llvm-svn: 371169
This commit is contained in:
Craig Topper 2019-09-06 06:02:13 +00:00
parent 890b551fe7
commit 6c8a34ed9b
5 changed files with 58 additions and 6 deletions

View File

@ -129,7 +129,10 @@ OpenCL C Language Changes in Clang
ABI Changes in Clang
--------------------
- ...
- gcc passes vectors of __int128 in memory on X86-64. Clang historically
broke the vectors into multiple scalars using two 64-bit values for each
element. Clang now matches the gcc behavior on Linux and NetBSD. You can
switch back to old API behavior with flag: -fclang-abi-compat=9.0.
OpenMP Support in Clang
-----------------------

View File

@ -138,6 +138,12 @@ public:
/// rather than returning the required alignment.
Ver7,
/// Attempt to be ABI-compatible with code generated by Clang 9.0.x
/// (SVN r351319). This causes vectors of __int128 to be passed in memory
/// instead of passing in multiple scalar registers on x86_64 on Linux and
/// NetBSD.
Ver9,
/// Conform to the underlying platform's C and C++ ABIs as closely
/// as we can.
Latest

View File

@ -2180,6 +2180,17 @@ class X86_64ABIInfo : public SwiftABIInfo {
return true;
}
// GCC classifies vectors of __int128 as memory.
bool passInt128VectorsInMem() const {
// Clang <= 9.0 did not do this.
if (getContext().getLangOpts().getClangABICompat() <=
LangOptions::ClangABI::Ver9)
return false;
const llvm::Triple &T = getTarget().getTriple();
return T.isOSLinux() || T.isOSNetBSD();
}
X86AVXABILevel AVXLevel;
// Some ABIs (e.g. X32 ABI and Native Client OS) use 32 bit pointers on
// 64-bit hardware.
@ -2660,6 +2671,14 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase,
Hi = Lo;
} else if (Size == 128 ||
(isNamedArg && Size <= getNativeVectorSizeForAVXABI(AVXLevel))) {
QualType ElementType = VT->getElementType();
// gcc passes 256 and 512 bit <X x __int128> vectors in memory. :(
if (passInt128VectorsInMem() && Size != 128 &&
(ElementType->isSpecificBuiltinType(BuiltinType::Int128) ||
ElementType->isSpecificBuiltinType(BuiltinType::UInt128)))
return;
// Arguments of 256-bits are split into four eightbyte chunks. The
// least significant one belongs to class SSE and all the others to class
// SSEUP. The original Lo and Hi design considers that types can't be
@ -2902,6 +2921,11 @@ bool X86_64ABIInfo::IsIllegalVectorType(QualType Ty) const {
unsigned LargestVector = getNativeVectorSizeForAVXABI(AVXLevel);
if (Size <= 64 || Size > LargestVector)
return true;
QualType EltTy = VecTy->getElementType();
if (passInt128VectorsInMem() &&
(EltTy->isSpecificBuiltinType(BuiltinType::Int128) ||
EltTy->isSpecificBuiltinType(BuiltinType::UInt128)))
return true;
}
return false;
@ -2976,14 +3000,28 @@ llvm::Type *X86_64ABIInfo::GetByteVectorType(QualType Ty) const {
Ty = QualType(InnerTy, 0);
llvm::Type *IRType = CGT.ConvertType(Ty);
if (isa<llvm::VectorType>(IRType) ||
IRType->getTypeID() == llvm::Type::FP128TyID)
if (isa<llvm::VectorType>(IRType)) {
// Don't pass vXi128 vectors in their native type, the backend can't
// legalize them.
if (passInt128VectorsInMem() &&
IRType->getVectorElementType()->isIntegerTy(128)) {
// Use a vXi64 vector.
uint64_t Size = getContext().getTypeSize(Ty);
return llvm::VectorType::get(llvm::Type::getInt64Ty(getVMContext()),
Size / 64);
}
return IRType;
}
if (IRType->getTypeID() == llvm::Type::FP128TyID)
return IRType;
// We couldn't find the preferred IR vector type for 'Ty'.
uint64_t Size = getContext().getTypeSize(Ty);
assert((Size == 128 || Size == 256 || Size == 512) && "Invalid type found!");
// Return a LLVM IR vector type based on the size of 'Ty'.
return llvm::VectorType::get(llvm::Type::getDoubleTy(getVMContext()),
Size / 64);

View File

@ -3164,6 +3164,8 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
Opts.setClangABICompat(LangOptions::ClangABI::Ver6);
else if (Major <= 7)
Opts.setClangABICompat(LangOptions::ClangABI::Ver7);
else if (Major <= 9)
Opts.setClangABICompat(LangOptions::ClangABI::Ver9);
} else if (Ver != "latest") {
Diags.Report(diag::err_drv_invalid_value)
<< A->getAsString(Args) << A->getValue();

View File

@ -3,24 +3,27 @@
// RUN: %clang_cc1 -triple x86_64-apple-darwin %s -target-feature +sse2 -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,MEM256ALIGN16,MEM512ALIGN16
// RUN: %clang_cc1 -triple x86_64-scei-ps4 %s -target-feature +sse2 -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,MEM256ALIGN32,MEM512ALIGN64
// RUN: %clang_cc1 -triple x86_64-unknown-freebsd10.0 %s -target-feature +sse2 -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,MEM256ALIGN32,MEM512ALIGN64
// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +sse2 -S -emit-llvm -o - -fclang-abi-compat=9 | FileCheck %s --check-prefixes=CLANG9ABI128,MEM256ALIGN32,MEM512ALIGN64
// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +avx -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG10ABI128,CLANG10ABI256,MEM512ALIGN64
// RUN: %clang_cc1 -triple x86_64-netbsd %s -target-feature +avx -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG10ABI128,CLANG10ABI256,MEM512ALIGN64
// RUN: %clang_cc1 -triple x86_64-apple-darwin %s -target-feature +avx -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,MEM512ALIGN32
// RUN: %clang_cc1 -triple x86_64-scei-ps4 %s -target-feature +avx -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,MEM512ALIGN64
// RUN: %clang_cc1 -triple x86_64-unknown-freebsd10.0 %s -target-feature +avx -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,MEM512ALIGN64
// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +avx -S -emit-llvm -o - -fclang-abi-compat=9 | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,MEM512ALIGN64
// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +avx512f -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG10ABI128,CLANG10ABI256,CLANG10ABI512
// RUN: %clang_cc1 -triple x86_64-netbsd %s -target-feature +avx512f -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG10ABI128,CLANG10ABI256,CLANG10ABI512
// RUN: %clang_cc1 -triple x86_64-apple-darwin %s -target-feature +avx512f -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,CLANG9ABI512
// RUN: %clang_cc1 -triple x86_64-scei-ps4 %s -target-feature +avx512f -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,CLANG9ABI512
// RUN: %clang_cc1 -triple x86_64-unknown-freebsd10.0 %s -target-feature +avx512f -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,CLANG9ABI512
// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +avx512f -S -emit-llvm -o - -fclang-abi-compat=9 | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,CLANG9ABI512
typedef unsigned long long v16u64 __attribute__((vector_size(16)));
typedef unsigned __int128 v16u128 __attribute__((vector_size(16)));
v16u64 test_v16u128(v16u64 a, v16u128 b) {
// CLANG10ABI128: define <2 x i64> @test_v16u128(<2 x i64> %{{.*}}, <1 x i128> %{{.*}})
// CLANG10ABI128: define <2 x i64> @test_v16u128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
// CLANG9ABI128: define <2 x i64> @test_v16u128(<2 x i64> %{{.*}}, <1 x i128> %{{.*}})
return a + (v16u64)b;
}
@ -31,7 +34,7 @@ typedef unsigned __int128 v32u128 __attribute__((vector_size(32)));
v32u64 test_v32u128(v32u64 a, v32u128 b) {
// MEM256ALIGN16: define <4 x i64> @test_v32u128(<4 x i64>* byval(<4 x i64>) align 16 %{{.*}}, <2 x i128>* byval(<2 x i128>) align 16 %{{.*}})
// MEM256ALIGN32: define <4 x i64> @test_v32u128(<4 x i64>* byval(<4 x i64>) align 32 %{{.*}}, <2 x i128>* byval(<2 x i128>) align 32 %{{.*}})
// CLANG10ABI256: define <4 x i64> @test_v32u128(<4 x i64> %{{.*}}, <2 x i128> %{{.*}})
// CLANG10ABI256: define <4 x i64> @test_v32u128(<4 x i64> %{{.*}}, <2 x i128>* byval(<2 x i128>) align 32 %{{.*}})
// CLANG9ABI256: define <4 x i64> @test_v32u128(<4 x i64> %{{.*}}, <2 x i128> %{{.*}})
return a + (v32u64)b;
}
@ -43,7 +46,7 @@ v64u64 test_v64u128(v64u64 a, v64u128 b) {
// MEM512ALIGN16: define <8 x i64> @test_v64u128(<8 x i64>* byval(<8 x i64>) align 16 %{{.*}}, <4 x i128>* byval(<4 x i128>) align 16 %{{.*}})
// MEM512ALIGN32: define <8 x i64> @test_v64u128(<8 x i64>* byval(<8 x i64>) align 32 %{{.*}}, <4 x i128>* byval(<4 x i128>) align 32 %{{.*}})
// MEM512ALIGN64: define <8 x i64> @test_v64u128(<8 x i64>* byval(<8 x i64>) align 64 %{{.*}}, <4 x i128>* byval(<4 x i128>) align 64 %{{.*}})
// CLANG10ABI512: define <8 x i64> @test_v64u128(<8 x i64> %{{.*}}, <4 x i128> %{{.*}})
// CLANG10ABI512: define <8 x i64> @test_v64u128(<8 x i64> %{{.*}}, <4 x i128>* byval(<4 x i128>) align 64 %{{.*}})
// CLANG9ABI512: define <8 x i64> @test_v64u128(<8 x i64> %{{.*}}, <4 x i128> %{{.*}})
return a + (v64u64)b;
}