forked from OSchip/llvm-project
Allow __fp16 as a function arg or return type for AArch64
ACLE 2.0 allows __fp16 to be used as a function argument or return type. This enables this for AArch64. This also fixes an existing bug that causes clang to not allow homogeneous floating-point aggregates with a base type of __fp16. This is valid for AAPCS64, but not for AAPCS-VFP. llvm-svn: 216558
This commit is contained in:
parent
6107a8f4db
commit
ed8ecc8429
|
@ -128,6 +128,7 @@ LANGOPT(ShortEnums , 1, 0, "short enum types")
|
|||
LANGOPT(OpenCL , 1, 0, "OpenCL")
|
||||
LANGOPT(OpenCLVersion , 32, 0, "OpenCL version")
|
||||
LANGOPT(NativeHalfType , 1, 0, "Native half type support")
|
||||
LANGOPT(HalfArgsAndReturns, 1, 0, "half args and returns")
|
||||
LANGOPT(CUDA , 1, 0, "CUDA")
|
||||
LANGOPT(OpenMP , 1, 0, "OpenMP support")
|
||||
|
||||
|
|
|
@ -511,6 +511,8 @@ def vtordisp_mode_EQ : Joined<["-"], "vtordisp-mode=">,
|
|||
HelpText<"Control vtordisp placement on win32 targets">;
|
||||
def fno_rtti_data : Flag<["-"], "fno-rtti-data">,
|
||||
HelpText<"Control emission of RTTI data">;
|
||||
def fallow_half_arguments_and_returns : Flag<["-"], "fallow-half-arguments-and-returns">,
|
||||
HelpText<"Allow function arguments and returns of type half">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Header Search Options
|
||||
|
|
|
@ -1132,7 +1132,8 @@ llvm::Constant *CodeGenModule::EmitConstantValue(const APValue &Value,
|
|||
case APValue::Float: {
|
||||
const llvm::APFloat &Init = Value.getFloat();
|
||||
if (&Init.getSemantics() == &llvm::APFloat::IEEEhalf &&
|
||||
!Context.getLangOpts().NativeHalfType)
|
||||
!Context.getLangOpts().NativeHalfType &&
|
||||
!Context.getLangOpts().HalfArgsAndReturns)
|
||||
return llvm::ConstantInt::get(VMContext, Init.bitcastToAPInt());
|
||||
else
|
||||
return llvm::ConstantFP::get(VMContext, Init);
|
||||
|
|
|
@ -701,7 +701,8 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType,
|
|||
llvm::Type *SrcTy = Src->getType();
|
||||
|
||||
// If casting to/from storage-only half FP, use special intrinsics.
|
||||
if (SrcType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) {
|
||||
if (SrcType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType &&
|
||||
!CGF.getContext().getLangOpts().HalfArgsAndReturns) {
|
||||
Src = Builder.CreateCall(
|
||||
CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16,
|
||||
CGF.CGM.FloatTy),
|
||||
|
@ -773,7 +774,8 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType,
|
|||
DstTy);
|
||||
|
||||
// Cast to half via float
|
||||
if (DstType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType)
|
||||
if (DstType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType &&
|
||||
!CGF.getContext().getLangOpts().HalfArgsAndReturns)
|
||||
DstTy = CGF.FloatTy;
|
||||
|
||||
if (isa<llvm::IntegerType>(SrcTy)) {
|
||||
|
@ -1691,7 +1693,8 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
|
|||
// Add the inc/dec to the real part.
|
||||
llvm::Value *amt;
|
||||
|
||||
if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) {
|
||||
if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType &&
|
||||
!CGF.getContext().getLangOpts().HalfArgsAndReturns) {
|
||||
// Another special case: half FP increment should be done via float
|
||||
value = Builder.CreateCall(
|
||||
CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16,
|
||||
|
@ -1714,7 +1717,8 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
|
|||
}
|
||||
value = Builder.CreateFAdd(value, amt, isInc ? "inc" : "dec");
|
||||
|
||||
if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType)
|
||||
if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType &&
|
||||
!CGF.getContext().getLangOpts().HalfArgsAndReturns)
|
||||
value = Builder.CreateCall(
|
||||
CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16,
|
||||
CGF.CGM.FloatTy),
|
||||
|
|
|
@ -358,9 +358,10 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
|
|||
|
||||
case BuiltinType::Half:
|
||||
// Half FP can either be storage-only (lowered to i16) or native.
|
||||
ResultType = getTypeForFormat(getLLVMContext(),
|
||||
Context.getFloatTypeSemantics(T),
|
||||
Context.getLangOpts().NativeHalfType);
|
||||
ResultType =
|
||||
getTypeForFormat(getLLVMContext(), Context.getFloatTypeSemantics(T),
|
||||
Context.getLangOpts().NativeHalfType ||
|
||||
Context.getLangOpts().HalfArgsAndReturns);
|
||||
break;
|
||||
case BuiltinType::Float:
|
||||
case BuiltinType::Double:
|
||||
|
|
|
@ -3544,8 +3544,9 @@ public:
|
|||
};
|
||||
}
|
||||
|
||||
static bool isHomogeneousAggregate(QualType Ty, const Type *&Base,
|
||||
static bool isARMHomogeneousAggregate(QualType Ty, const Type *&Base,
|
||||
ASTContext &Context,
|
||||
bool isAArch64,
|
||||
uint64_t *HAMembers = nullptr);
|
||||
|
||||
ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty,
|
||||
|
@ -3627,7 +3628,7 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty,
|
|||
// Homogeneous Floating-point Aggregates (HFAs) need to be expanded.
|
||||
const Type *Base = nullptr;
|
||||
uint64_t Members = 0;
|
||||
if (isHomogeneousAggregate(Ty, Base, getContext(), &Members)) {
|
||||
if (isARMHomogeneousAggregate(Ty, Base, getContext(), true, &Members)) {
|
||||
IsHA = true;
|
||||
if (!IsNamedArg && isDarwinPCS()) {
|
||||
// With the Darwin ABI, variadic arguments are always passed on the stack
|
||||
|
@ -3685,7 +3686,7 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const {
|
|||
return ABIArgInfo::getIgnore();
|
||||
|
||||
const Type *Base = nullptr;
|
||||
if (isHomogeneousAggregate(RetTy, Base, getContext()))
|
||||
if (isARMHomogeneousAggregate(RetTy, Base, getContext(), true))
|
||||
// Homogeneous Floating-point Aggregates (HFAs) are returned directly.
|
||||
return ABIArgInfo::getDirect();
|
||||
|
||||
|
@ -3822,7 +3823,7 @@ static llvm::Value *EmitAArch64VAArg(llvm::Value *VAListAddr, QualType Ty,
|
|||
|
||||
const Type *Base = nullptr;
|
||||
uint64_t NumMembers;
|
||||
bool IsHFA = isHomogeneousAggregate(Ty, Base, Ctx, &NumMembers);
|
||||
bool IsHFA = isARMHomogeneousAggregate(Ty, Base, Ctx, true, &NumMembers);
|
||||
if (IsHFA && NumMembers > 1) {
|
||||
// Homogeneous aggregates passed in registers will have their elements split
|
||||
// and stored 16-bytes apart regardless of size (they're notionally in qN,
|
||||
|
@ -3965,7 +3966,7 @@ llvm::Value *AArch64ABIInfo::EmitDarwinVAArg(llvm::Value *VAListAddr, QualType T
|
|||
uint64_t Align = CGF.getContext().getTypeAlign(Ty) / 8;
|
||||
|
||||
const Type *Base = nullptr;
|
||||
bool isHA = isHomogeneousAggregate(Ty, Base, getContext());
|
||||
bool isHA = isARMHomogeneousAggregate(Ty, Base, getContext(), true);
|
||||
|
||||
bool isIndirect = false;
|
||||
// Arguments bigger than 16 bytes which aren't homogeneous aggregates should
|
||||
|
@ -4251,15 +4252,16 @@ void ARMABIInfo::setRuntimeCC() {
|
|||
RuntimeCC = abiCC;
|
||||
}
|
||||
|
||||
/// isHomogeneousAggregate - Return true if a type is an AAPCS-VFP homogeneous
|
||||
/// isARMHomogeneousAggregate - Return true if a type is an AAPCS-VFP homogeneous
|
||||
/// aggregate. If HAMembers is non-null, the number of base elements
|
||||
/// contained in the type is returned through it; this is used for the
|
||||
/// recursive calls that check aggregate component types.
|
||||
static bool isHomogeneousAggregate(QualType Ty, const Type *&Base,
|
||||
ASTContext &Context, uint64_t *HAMembers) {
|
||||
static bool isARMHomogeneousAggregate(QualType Ty, const Type *&Base,
|
||||
ASTContext &Context, bool isAArch64,
|
||||
uint64_t *HAMembers) {
|
||||
uint64_t Members = 0;
|
||||
if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) {
|
||||
if (!isHomogeneousAggregate(AT->getElementType(), Base, Context, &Members))
|
||||
if (!isARMHomogeneousAggregate(AT->getElementType(), Base, Context, isAArch64, &Members))
|
||||
return false;
|
||||
Members *= AT->getSize().getZExtValue();
|
||||
} else if (const RecordType *RT = Ty->getAs<RecordType>()) {
|
||||
|
@ -4270,7 +4272,7 @@ static bool isHomogeneousAggregate(QualType Ty, const Type *&Base,
|
|||
Members = 0;
|
||||
for (const auto *FD : RD->fields()) {
|
||||
uint64_t FldMembers;
|
||||
if (!isHomogeneousAggregate(FD->getType(), Base, Context, &FldMembers))
|
||||
if (!isARMHomogeneousAggregate(FD->getType(), Base, Context, isAArch64, &FldMembers))
|
||||
return false;
|
||||
|
||||
Members = (RD->isUnion() ?
|
||||
|
@ -4284,12 +4286,22 @@ static bool isHomogeneousAggregate(QualType Ty, const Type *&Base,
|
|||
}
|
||||
|
||||
// Homogeneous aggregates for AAPCS-VFP must have base types of float,
|
||||
// double, or 64-bit or 128-bit vectors.
|
||||
// double, or 64-bit or 128-bit vectors. "long double" has the same machine
|
||||
// type as double, so it is also allowed as a base type.
|
||||
// Homogeneous aggregates for AAPCS64 must have base types of a floating
|
||||
// point type or a short-vector type. This is the same as the 32-bit ABI,
|
||||
// but with the difference that any floating-point type is allowed,
|
||||
// including __fp16.
|
||||
if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
|
||||
if (BT->getKind() != BuiltinType::Float &&
|
||||
BT->getKind() != BuiltinType::Double &&
|
||||
BT->getKind() != BuiltinType::LongDouble)
|
||||
return false;
|
||||
if (isAArch64) {
|
||||
if (!BT->isFloatingPoint())
|
||||
return false;
|
||||
} else {
|
||||
if (BT->getKind() != BuiltinType::Float &&
|
||||
BT->getKind() != BuiltinType::Double &&
|
||||
BT->getKind() != BuiltinType::LongDouble)
|
||||
return false;
|
||||
}
|
||||
} else if (const VectorType *VT = Ty->getAs<VectorType>()) {
|
||||
unsigned VecSize = Context.getTypeSize(VT);
|
||||
if (VecSize != 64 && VecSize != 128)
|
||||
|
@ -4491,7 +4503,7 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic,
|
|||
// into VFP registers.
|
||||
const Type *Base = nullptr;
|
||||
uint64_t Members = 0;
|
||||
if (isHomogeneousAggregate(Ty, Base, getContext(), &Members)) {
|
||||
if (isARMHomogeneousAggregate(Ty, Base, getContext(), false, &Members)) {
|
||||
assert(Base && "Base class should be set for homogeneous aggregate");
|
||||
// Base can be a floating-point or a vector.
|
||||
if (Base->isVectorType()) {
|
||||
|
@ -4696,7 +4708,7 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy,
|
|||
// Check for homogeneous aggregates with AAPCS-VFP.
|
||||
if (getABIKind() == AAPCS_VFP && !isVariadic) {
|
||||
const Type *Base = nullptr;
|
||||
if (isHomogeneousAggregate(RetTy, Base, getContext())) {
|
||||
if (isARMHomogeneousAggregate(RetTy, Base, getContext(), false)) {
|
||||
assert(Base && "Base class should be set for homogeneous aggregate");
|
||||
// Homogeneous Aggregates are returned directly.
|
||||
return ABIArgInfo::getDirect(nullptr, 0, nullptr, !isAAPCS_VFP);
|
||||
|
|
|
@ -3714,6 +3714,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
|
|||
CmdArgs.push_back(Args.MakeArgString("-mstack-alignment=" + alignment));
|
||||
}
|
||||
|
||||
if (getToolChain().getTriple().getArch() == llvm::Triple::aarch64 ||
|
||||
getToolChain().getTriple().getArch() == llvm::Triple::aarch64_be)
|
||||
CmdArgs.push_back("-fallow-half-arguments-and-returns");
|
||||
|
||||
if (Arg *A = Args.getLastArg(options::OPT_mrestrict_it,
|
||||
options::OPT_mno_restrict_it)) {
|
||||
if (A->getOption().matches(options::OPT_mrestrict_it)) {
|
||||
|
|
|
@ -1500,6 +1500,8 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
|
|||
Opts.CurrentModule = Args.getLastArgValue(OPT_fmodule_name);
|
||||
Opts.ImplementationOfModule =
|
||||
Args.getLastArgValue(OPT_fmodule_implementation_of);
|
||||
Opts.NativeHalfType = Opts.NativeHalfType;
|
||||
Opts.HalfArgsAndReturns = Args.hasArg(OPT_fallow_half_arguments_and_returns);
|
||||
|
||||
if (!Opts.CurrentModule.empty() && !Opts.ImplementationOfModule.empty() &&
|
||||
Opts.CurrentModule != Opts.ImplementationOfModule) {
|
||||
|
|
|
@ -1746,7 +1746,7 @@ bool Sema::CheckFunctionReturnType(QualType T, SourceLocation Loc) {
|
|||
}
|
||||
|
||||
// Functions cannot return half FP.
|
||||
if (T->isHalfType()) {
|
||||
if (T->isHalfType() && !getLangOpts().HalfArgsAndReturns) {
|
||||
Diag(Loc, diag::err_parameters_retval_cannot_have_fp16_type) << 1 <<
|
||||
FixItHint::CreateInsertion(Loc, "*");
|
||||
return true;
|
||||
|
@ -1776,7 +1776,7 @@ QualType Sema::BuildFunctionType(QualType T,
|
|||
if (ParamType->isVoidType()) {
|
||||
Diag(Loc, diag::err_param_with_void_type);
|
||||
Invalid = true;
|
||||
} else if (ParamType->isHalfType()) {
|
||||
} else if (ParamType->isHalfType() && !getLangOpts().HalfArgsAndReturns) {
|
||||
// Disallow half FP arguments.
|
||||
Diag(Loc, diag::err_parameters_retval_cannot_have_fp16_type) << 0 <<
|
||||
FixItHint::CreateInsertion(Loc, "*");
|
||||
|
@ -2751,7 +2751,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
|
|||
S.Diag(D.getIdentifierLoc(), diag::err_opencl_half_return) << T;
|
||||
D.setInvalidType(true);
|
||||
}
|
||||
} else {
|
||||
} else if (!S.getLangOpts().HalfArgsAndReturns) {
|
||||
S.Diag(D.getIdentifierLoc(),
|
||||
diag::err_parameters_retval_cannot_have_fp16_type) << 1;
|
||||
D.setInvalidType(true);
|
||||
|
@ -2941,7 +2941,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
|
|||
D.setInvalidType();
|
||||
Param->setInvalidDecl();
|
||||
}
|
||||
} else {
|
||||
} else if (!S.getLangOpts().HalfArgsAndReturns) {
|
||||
S.Diag(Param->getLocation(),
|
||||
diag::err_parameters_retval_cannot_have_fp16_type) << 0;
|
||||
D.setInvalidType();
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// RUN: %clang_cc1 -triple arm64-linux-gnu -target-feature +neon -target-abi aapcs -ffreestanding -emit-llvm -w -o - %s | FileCheck %s
|
||||
// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -target-abi aapcs -ffreestanding -fallow-half-arguments-and-returns -emit-llvm -w -o - %s | FileCheck %s
|
||||
|
||||
// AAPCS clause C.8 says: If the argument has an alignment of 16 then the NGRN
|
||||
// is rounded up to the next even number.
|
||||
|
@ -40,3 +40,12 @@ void test4(BigHFA v0_v2, BigHFA v3_v5, BigHFA sp, double sp48, BigHFA sp64) {
|
|||
// CHECK: define i8 @test5(i8 %a, i16 %b)
|
||||
unsigned char test5(unsigned char a, signed short b) {
|
||||
}
|
||||
|
||||
// __fp16 can be used as a function argument or return type (ACLE 2.0)
|
||||
// CHECK: define half @test_half(half %{{.*}})
|
||||
__fp16 test_half(__fp16 A) { }
|
||||
|
||||
// __fp16 is a base type for homogeneous floating-point aggregates for AArch64 (but not 32-bit ARM).
|
||||
// CHECK: define %struct.HFA_half @test_half_hfa(half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}})
|
||||
struct HFA_half { __fp16 a[4]; };
|
||||
struct HFA_half test_half_hfa(struct HFA_half A) { }
|
||||
|
|
Loading…
Reference in New Issue