[PowerPC] ABI support for the QPX vector instruction set

Support for the QPX vector instruction set, used on the IBM BG/Q supercomputer,
has recently been added to the LLVM PowerPC backend. This vector instruction
set requires some ABI modifications because the ABI on the BG/Q expects
<4 x double> vectors to be provided with 32-byte stack alignment, and to be
handled as native vector types (similar to how Altivec vectors are handled on
mainline PPC systems). I've named this ABI variant elfv1-qpx, have made this
the default ABI when QPX is supported, and have updated the ABI handling code
to provide QPX vectors with the correct stack alignment and associated
register-assignment logic.

llvm-svn: 231960
This commit is contained in:
Hal Finkel 2015-03-11 19:14:15 +00:00
parent 8cda34f5e7
commit 0d0a1a53e3
9 changed files with 159 additions and 29 deletions

View File

@ -742,6 +742,7 @@ class PPCTargetInfo : public TargetInfo {
bool HasVSX;
bool HasP8Vector;
bool HasP8Crypto;
bool HasQPX;
protected:
std::string ABI;
@ -749,7 +750,7 @@ protected:
public:
PPCTargetInfo(const llvm::Triple &Triple)
: TargetInfo(Triple), HasVSX(false), HasP8Vector(false),
HasP8Crypto(false) {
HasP8Crypto(false), HasQPX(false) {
BigEndian = (Triple.getArch() != llvm::Triple::ppc64le);
LongDoubleWidth = LongDoubleAlign = 128;
LongDoubleFormat = &llvm::APFloat::PPCDoubleDouble;
@ -1015,6 +1016,11 @@ bool PPCTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
continue;
}
if (Feature == "qpx") {
HasQPX = true;
continue;
}
// TODO: Finish this list and add an assert that we've handled them
// all.
}
@ -1049,7 +1055,7 @@ void PPCTargetInfo::getTargetDefines(const LangOptions &Opts,
}
// ABI options.
if (ABI == "elfv1")
if (ABI == "elfv1" || ABI == "elfv1-qpx")
Builder.defineMacro("_CALL_ELF", "1");
if (ABI == "elfv2")
Builder.defineMacro("_CALL_ELF", "2");
@ -1223,6 +1229,7 @@ bool PPCTargetInfo::hasFeature(StringRef Feature) const {
.Case("vsx", HasVSX)
.Case("power8-vector", HasP8Vector)
.Case("crypto", HasP8Crypto)
.Case("qpx", HasQPX)
.Default(false);
}
@ -1403,7 +1410,7 @@ public:
}
// PPC64 Linux-specifc ABI options.
bool setABI(const std::string &Name) override {
if (Name == "elfv1" || Name == "elfv2") {
if (Name == "elfv1" || Name == "elfv1-qpx" || Name == "elfv2") {
ABI = Name;
return true;
}

View File

@ -3280,13 +3280,42 @@ public:
private:
static const unsigned GPRBits = 64;
ABIKind Kind;
bool HasQPX;
// A vector of float or double will be promoted to <4 x f32> or <4 x f64> and
// will be passed in a QPX register.
bool IsQPXVectorTy(const Type *Ty) const {
if (!HasQPX)
return false;
if (const VectorType *VT = Ty->getAs<VectorType>()) {
unsigned NumElements = VT->getNumElements();
if (NumElements == 1)
return false;
if (VT->getElementType()->isSpecificBuiltinType(BuiltinType::Double)) {
if (getContext().getTypeSize(Ty) <= 256)
return true;
} else if (VT->getElementType()->
isSpecificBuiltinType(BuiltinType::Float)) {
if (getContext().getTypeSize(Ty) <= 128)
return true;
}
}
return false;
}
bool IsQPXVectorTy(QualType Ty) const {
return IsQPXVectorTy(Ty.getTypePtr());
}
public:
PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, ABIKind Kind)
: DefaultABIInfo(CGT), Kind(Kind) {}
PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, ABIKind Kind, bool HasQPX)
: DefaultABIInfo(CGT), Kind(Kind), HasQPX(HasQPX) {}
bool isPromotableTypeForABI(QualType Ty) const;
bool isAlignedParamType(QualType Ty) const;
bool isAlignedParamType(QualType Ty, bool &Align32) const;
ABIArgInfo classifyReturnType(QualType RetTy) const;
ABIArgInfo classifyArgumentType(QualType Ty) const;
@ -3311,7 +3340,8 @@ public:
const Type *T = isSingleElementStruct(I.type, getContext());
if (T) {
const BuiltinType *BT = T->getAs<BuiltinType>();
if ((T->isVectorType() && getContext().getTypeSize(T) == 128) ||
if (IsQPXVectorTy(T) ||
(T->isVectorType() && getContext().getTypeSize(T) == 128) ||
(BT && BT->isFloatingPoint())) {
QualType QT(T, 0);
I.info = ABIArgInfo::getDirectInReg(CGT.ConvertType(QT));
@ -3327,10 +3357,13 @@ public:
};
class PPC64_SVR4_TargetCodeGenInfo : public TargetCodeGenInfo {
bool HasQPX;
public:
PPC64_SVR4_TargetCodeGenInfo(CodeGenTypes &CGT,
PPC64_SVR4_ABIInfo::ABIKind Kind)
: TargetCodeGenInfo(new PPC64_SVR4_ABIInfo(CGT, Kind)) {}
PPC64_SVR4_ABIInfo::ABIKind Kind, bool HasQPX)
: TargetCodeGenInfo(new PPC64_SVR4_ABIInfo(CGT, Kind, HasQPX)),
HasQPX(HasQPX) {}
int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
// This is recovered from gcc output.
@ -3340,7 +3373,12 @@ public:
bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
llvm::Value *Address) const override;
unsigned getOpenMPSimdDefaultAlignment(QualType) const override {
unsigned getOpenMPSimdDefaultAlignment(QualType QT) const override {
if (HasQPX)
if (const PointerType *PT = QT->getAs<PointerType>())
if (PT->getPointeeType()->isSpecificBuiltinType(BuiltinType::Double))
return 32; // Natural alignment for QPX doubles.
return 16; // Natural alignment for Altivec and VSX vectors.
}
@ -3401,15 +3439,23 @@ PPC64_SVR4_ABIInfo::isPromotableTypeForABI(QualType Ty) const {
/// isAlignedParamType - Determine whether a type requires 16-byte
/// alignment in the parameter area.
bool
PPC64_SVR4_ABIInfo::isAlignedParamType(QualType Ty) const {
PPC64_SVR4_ABIInfo::isAlignedParamType(QualType Ty, bool &Align32) const {
Align32 = false;
// Complex types are passed just like their elements.
if (const ComplexType *CTy = Ty->getAs<ComplexType>())
Ty = CTy->getElementType();
// Only vector types of size 16 bytes need alignment (larger types are
// passed via reference, smaller types are not aligned).
if (Ty->isVectorType())
if (IsQPXVectorTy(Ty)) {
if (getContext().getTypeSize(Ty) > 128)
Align32 = true;
return true;
} else if (Ty->isVectorType()) {
return getContext().getTypeSize(Ty) == 128;
}
// For single-element float/vector structs, we consider the whole type
// to have the same alignment requirements as its single element.
@ -3417,7 +3463,7 @@ PPC64_SVR4_ABIInfo::isAlignedParamType(QualType Ty) const {
const Type *EltType = isSingleElementStruct(Ty, getContext());
if (EltType) {
const BuiltinType *BT = EltType->getAs<BuiltinType>();
if ((EltType->isVectorType() &&
if (IsQPXVectorTy(EltType) || (EltType->isVectorType() &&
getContext().getTypeSize(EltType) == 128) ||
(BT && BT->isFloatingPoint()))
AlignAsType = EltType;
@ -3431,13 +3477,22 @@ PPC64_SVR4_ABIInfo::isAlignedParamType(QualType Ty) const {
AlignAsType = Base;
// With special case aggregates, only vector base types need alignment.
if (AlignAsType)
if (AlignAsType && IsQPXVectorTy(AlignAsType)) {
if (getContext().getTypeSize(AlignAsType) > 128)
Align32 = true;
return true;
} else if (AlignAsType) {
return AlignAsType->isVectorType();
}
// Otherwise, we only need alignment for any aggregate type that
// has an alignment requirement of >= 16 bytes.
if (isAggregateTypeForABI(Ty) && getContext().getTypeAlign(Ty) >= 128)
if (isAggregateTypeForABI(Ty) && getContext().getTypeAlign(Ty) >= 128) {
if (HasQPX && getContext().getTypeAlign(Ty) >= 256)
Align32 = true;
return true;
}
return false;
}
@ -3543,7 +3598,7 @@ bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
return true;
}
if (const VectorType *VT = Ty->getAs<VectorType>()) {
if (getContext().getTypeSize(VT) == 128)
if (getContext().getTypeSize(VT) == 128 || IsQPXVectorTy(Ty))
return true;
}
return false;
@ -3569,7 +3624,7 @@ PPC64_SVR4_ABIInfo::classifyArgumentType(QualType Ty) const {
// Non-Altivec vector types are passed in GPRs (smaller than 16 bytes)
// or via reference (larger than 16 bytes).
if (Ty->isVectorType()) {
if (Ty->isVectorType() && !IsQPXVectorTy(Ty)) {
uint64_t Size = getContext().getTypeSize(Ty);
if (Size > 128)
return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
@ -3583,7 +3638,9 @@ PPC64_SVR4_ABIInfo::classifyArgumentType(QualType Ty) const {
if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
return ABIArgInfo::getIndirect(0, RAA == CGCXXABI::RAA_DirectInMemory);
uint64_t ABIAlign = isAlignedParamType(Ty)? 16 : 8;
bool Align32;
uint64_t ABIAlign = isAlignedParamType(Ty, Align32) ?
(Align32 ? 32 : 16) : 8;
uint64_t TyAlign = getContext().getTypeAlign(Ty) / 8;
// ELFv2 homogeneous aggregates are passed as array types.
@ -3640,7 +3697,7 @@ PPC64_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const {
// Non-Altivec vector types are returned in GPRs (smaller than 16 bytes)
// or via reference (larger than 16 bytes).
if (RetTy->isVectorType()) {
if (RetTy->isVectorType() && !IsQPXVectorTy(RetTy)) {
uint64_t Size = getContext().getTypeSize(RetTy);
if (Size > 128)
return ABIArgInfo::getIndirect(0);
@ -3697,10 +3754,13 @@ llvm::Value *PPC64_SVR4_ABIInfo::EmitVAArg(llvm::Value *VAListAddr,
llvm::Value *Addr = Builder.CreateLoad(VAListAddrAsBPP, "ap.cur");
// Handle types that require 16-byte alignment in the parameter save area.
if (isAlignedParamType(Ty)) {
bool Align32;
if (isAlignedParamType(Ty, Align32)) {
llvm::Value *AddrAsInt = Builder.CreatePtrToInt(Addr, CGF.Int64Ty);
AddrAsInt = Builder.CreateAdd(AddrAsInt, Builder.getInt64(15));
AddrAsInt = Builder.CreateAnd(AddrAsInt, Builder.getInt64(-16));
AddrAsInt = Builder.CreateAdd(AddrAsInt,
Builder.getInt64(Align32 ? 31 : 15));
AddrAsInt = Builder.CreateAnd(AddrAsInt,
Builder.getInt64(Align32 ? -32 : -16));
Addr = Builder.CreateIntToPtr(AddrAsInt, BP, "ap.align");
}
@ -6978,19 +7038,21 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv1;
if (getTarget().getABI() == "elfv2")
Kind = PPC64_SVR4_ABIInfo::ELFv2;
bool HasQPX = getTarget().getABI() == "elfv1-qpx";
return *(TheTargetCodeGenInfo =
new PPC64_SVR4_TargetCodeGenInfo(Types, Kind));
new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, HasQPX));
} else
return *(TheTargetCodeGenInfo = new PPC64TargetCodeGenInfo(Types));
case llvm::Triple::ppc64le: {
assert(Triple.isOSBinFormatELF() && "PPC64 LE non-ELF not supported!");
PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv2;
if (getTarget().getABI() == "elfv1")
if (getTarget().getABI() == "elfv1" || getTarget().getABI() == "elfv1-qpx")
Kind = PPC64_SVR4_ABIInfo::ELFv1;
bool HasQPX = getTarget().getABI() == "elfv1-qpx";
return *(TheTargetCodeGenInfo =
new PPC64_SVR4_TargetCodeGenInfo(Types, Kind));
new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, HasQPX));
}
case llvm::Triple::nvptx:

View File

@ -1316,9 +1316,22 @@ void Clang::AddPPCTargetArgs(const ArgList &Args,
ABIName = A->getValue();
} else if (getToolChain().getTriple().isOSLinux())
switch(getToolChain().getArch()) {
case llvm::Triple::ppc64:
case llvm::Triple::ppc64: {
// When targeting a processor that supports QPX, or if QPX is
// specifically enabled, default to using the ABI that supports QPX (so
// long as it is not specifically disabled).
bool HasQPX = false;
if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ))
HasQPX = A->getValue() == StringRef("a2q");
HasQPX = Args.hasFlag(options::OPT_mqpx, options::OPT_mno_qpx, HasQPX);
if (HasQPX) {
ABIName = "elfv1-qpx";
break;
}
ABIName = "elfv1";
break;
}
case llvm::Triple::ppc64le:
ABIName = "elfv2";
break;

View File

@ -7,6 +7,8 @@
// RUN: %clang_cc1 -triple powerpc64-unknown-linux-gnu -emit-llvm -o - %s \
// RUN: -target-abi elfv1 | FileCheck %s --check-prefix=CHECK-ELFv1
// RUN: %clang_cc1 -triple powerpc64-unknown-linux-gnu -emit-llvm -o - %s \
// RUN: -target-abi elfv1-qpx | FileCheck %s --check-prefix=CHECK-ELFv1
// RUN: %clang_cc1 -triple powerpc64-unknown-linux-gnu -emit-llvm -o - %s \
// RUN: -target-abi elfv2 | FileCheck %s --check-prefix=CHECK-ELFv2
// RUN: %clang_cc1 -triple powerpc64le-unknown-linux-gnu -emit-llvm -o - %s \
// RUN: | FileCheck %s --check-prefix=CHECK-ELFv2
@ -27,6 +29,8 @@ struct fab func_fab(struct fab x) { return x; }
// RUN: %clang_cc1 -triple powerpc64-unknown-linux-gnu -S -o - %s \
// RUN: -target-abi elfv1 | FileCheck %s --check-prefix=CHECK-ASM-ELFv1
// RUN: %clang_cc1 -triple powerpc64-unknown-linux-gnu -S -o - %s \
// RUN: -target-abi elfv1-qpx | FileCheck %s --check-prefix=CHECK-ASM-ELFv1
// RUN: %clang_cc1 -triple powerpc64-unknown-linux-gnu -S -o - %s \
// RUN: -target-abi elfv2 | FileCheck %s --check-prefix=CHECK-ASM-ELFv2
// RUN: %clang_cc1 -triple powerpc64le-unknown-linux-gnu -S -o - %s \
// RUN: | FileCheck %s --check-prefix=CHECK-ASM-ELFv2

View File

@ -0,0 +1,29 @@
// RUN: %clang_cc1 -triple powerpc64-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s -check-prefix=ALL -check-prefix=NORMAL
// RUN: %clang_cc1 -triple powerpc64-unknown-linux-gnu -emit-llvm -o - -target-abi elfv1-qpx %s | FileCheck %s -check-prefix=ALL -check-prefix=QPX
typedef float v4sf __attribute__((vector_size(16)));
typedef double v4df __attribute__((vector_size(32)));
struct ssf { v4sf v; };
struct sdf { v4df v; };
struct ssf2 { v4sf v[2]; };
struct sdf2 { v4df v[2]; };
v4sf foo1(struct ssf a, v4sf b, struct ssf2 c) {
return a.v + b;
}
// ALL-LABEL: define <4 x float> @foo1(<4 x float> inreg %a.coerce, <4 x float> %b, [2 x i128] %c.coerce)
// ALL: ret <4 x float>
v4df foo2(struct sdf a, v4df b, struct sdf2 c) {
return a.v + b;
}
// QPX-LABEL: define <4 x double> @foo2(<4 x double> inreg %a.coerce, <4 x double> %b, [2 x i256] %c.coerce)
// QPX: ret <4 x double>
// NORMAL-LABEL: define void @foo2(<4 x double>* noalias sret %agg.result, [2 x i128] %a.coerce, <4 x double>*, [4 x i128] %c.coerce)
// NORMAL: ret void

View File

@ -634,6 +634,9 @@
// RUN: %clang %s -### -o %t.o 2>&1 \
// RUN: --target=powerpc64-linux-gnu -mabi=elfv1 \
// RUN: | FileCheck --check-prefix=CHECK-PPC64-ELFv1 %s
// RUN: %clang %s -### -o %t.o 2>&1 \
// RUN: --target=powerpc64-linux-gnu -mabi=elfv1-qpx \
// RUN: | FileCheck --check-prefix=CHECK-PPC64-ELFv1 %s
// CHECK-PPC64-ELFv1: "{{.*}}ld{{(.exe)?}}"
// CHECK-PPC64-ELFv1: "-m" "elf64ppc"
// CHECK-PPC64-ELFv1: "-dynamic-linker" "{{.*}}/lib64/ld64.so.1"

View File

@ -5,6 +5,14 @@
// RUN: %clang -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
// RUN: -mabi=elfv1 | FileCheck -check-prefix=CHECK-ELFv1 %s
// RUN: %clang -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
// RUN: -mabi=elfv1-qpx | FileCheck -check-prefix=CHECK-ELFv1-QPX %s
// RUN: %clang -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
// RUN: -mcpu=a2q | FileCheck -check-prefix=CHECK-ELFv1-QPX %s
// RUN: %clang -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
// RUN: -mcpu=a2 -mqpx | FileCheck -check-prefix=CHECK-ELFv1-QPX %s
// RUN: %clang -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
// RUN: -mcpu=a2q -mno-qpx | FileCheck -check-prefix=CHECK-ELFv1 %s
// RUN: %clang -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
// RUN: -mabi=elfv2 | FileCheck -check-prefix=CHECK-ELFv2 %s
// RUN: %clang -target powerpc64le-unknown-linux-gnu %s -### -o %t.o 2>&1 \
@ -15,5 +23,6 @@
// RUN: -mabi=elfv2 | FileCheck -check-prefix=CHECK-ELFv2 %s
// CHECK-ELFv1: "-target-abi" "elfv1"
// CHECK-ELFv1-QPX: "-target-abi" "elfv1-qpx"
// CHECK-ELFv2: "-target-abi" "elfv2"

View File

@ -1,5 +1,6 @@
// RUN: %clang_cc1 -fopenmp=libiomp5 -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp=libiomp5 -triple powerpc64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp=libiomp5 -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=NORMAL
// RUN: %clang_cc1 -fopenmp=libiomp5 -triple powerpc64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=NORMAL
// RUN: %clang_cc1 -fopenmp=libiomp5 -triple powerpc64-unknown-unknown -target-abi elfv1-qpx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=QPX
void h1(float *c, float *a, double b[], int size)
{
@ -15,7 +16,8 @@ void h1(float *c, float *a, double b[], int size)
// CHECK-NEXT: [[A_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[A_MASKEDPTR]], 0
// CHECK-NEXT: call void @llvm.assume(i1 [[A_MASKCOND]])
// CHECK: [[B_PTRINT:%.+]] = ptrtoint
// CHECK-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
// NORMAL-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
// QPX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31
// CHECK-NEXT: [[B_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[B_MASKEDPTR]], 0
// CHECK-NEXT: call void @llvm.assume(i1 [[B_MASKCOND]])
for (int i = 0; i < size; ++i) {

View File

@ -6004,6 +6004,7 @@
// RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64-unknown-linux-gnu < /dev/null | FileCheck -check-prefix PPC64-ELFv1 %s
// RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64-unknown-linux-gnu -target-abi elfv1 < /dev/null | FileCheck -check-prefix PPC64-ELFv1 %s
// RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64-unknown-linux-gnu -target-abi elfv1-qpx < /dev/null | FileCheck -check-prefix PPC64-ELFv1 %s
// RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64-unknown-linux-gnu -target-abi elfv2 < /dev/null | FileCheck -check-prefix PPC64-ELFv2 %s
// RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64le-unknown-linux-gnu < /dev/null | FileCheck -check-prefix PPC64-ELFv2 %s
// RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64le-unknown-linux-gnu -target-abi elfv1 < /dev/null | FileCheck -check-prefix PPC64-ELFv1 %s