forked from OSchip/llvm-project
[AArch64] Implement Vector Funtion ABI name mangling.
Summary: The name mangling scheme is defined in section 3.5 of the "Vector function application binary interface specification for AArch64" [1]. [1] https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi Reviewers: rengolin, ABataev Reviewed By: ABataev Subscribers: sdesmalen, javed.absar, kristof.beyls, jdoerfert, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D60583 llvm-svn: 358490
This commit is contained in:
parent
aa18ae862d
commit
a0a2264ef7
|
@ -9648,6 +9648,307 @@ emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
|
|||
}
|
||||
}
|
||||
|
||||
// This are the Functions that are needed to mangle the name of the
|
||||
// vector functions generated by the compiler, according to the rules
|
||||
// defined in the "Vector Function ABI specifications for AArch64",
|
||||
// available at
|
||||
// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
|
||||
|
||||
/// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
|
||||
///
|
||||
/// TODO: Need to implement the behavior for reference marked with a
|
||||
/// var or no linear modifiers (1.b in the section). For this, we
|
||||
/// need to extend ParamKindTy to support the linear modifiers.
|
||||
static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
|
||||
QT = QT.getCanonicalType();
|
||||
|
||||
if (QT->isVoidType())
|
||||
return false;
|
||||
|
||||
if (Kind == ParamKindTy::Uniform)
|
||||
return false;
|
||||
|
||||
if (Kind == ParamKindTy::Linear)
|
||||
return false;
|
||||
|
||||
// TODO: Handle linear references with modifiers
|
||||
|
||||
if (Kind == ParamKindTy::LinearWithVarStride)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
|
||||
static bool getAArch64PBV(QualType QT, ASTContext &C) {
|
||||
QT = QT.getCanonicalType();
|
||||
unsigned Size = C.getTypeSize(QT);
|
||||
|
||||
// Only scalars and complex within 16 bytes wide set PVB to true.
|
||||
if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
|
||||
return false;
|
||||
|
||||
if (QT->isFloatingType())
|
||||
return true;
|
||||
|
||||
if (QT->isIntegerType())
|
||||
return true;
|
||||
|
||||
if (QT->isPointerType())
|
||||
return true;
|
||||
|
||||
// TODO: Add support for complex types (section 3.1.2, item 2).
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Computes the lane size (LS) of a return type or of an input parameter,
|
||||
/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
|
||||
/// TODO: Add support for references, section 3.2.1, item 1.
|
||||
static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
|
||||
if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
|
||||
QualType PTy = QT.getCanonicalType()->getPointeeType();
|
||||
if (getAArch64PBV(PTy, C))
|
||||
return C.getTypeSize(PTy);
|
||||
}
|
||||
if (getAArch64PBV(QT, C))
|
||||
return C.getTypeSize(QT);
|
||||
|
||||
return C.getTypeSize(C.getUIntPtrType());
|
||||
}
|
||||
|
||||
// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
|
||||
// signature of the scalar function, as defined in 3.2.2 of the
|
||||
// AAVFABI.
|
||||
static std::tuple<unsigned, unsigned, bool>
|
||||
getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
|
||||
QualType RetType = FD->getReturnType().getCanonicalType();
|
||||
|
||||
ASTContext &C = FD->getASTContext();
|
||||
|
||||
bool OutputBecomesInput = false;
|
||||
|
||||
llvm::SmallVector<unsigned, 8> Sizes;
|
||||
if (!RetType->isVoidType()) {
|
||||
Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
|
||||
if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
|
||||
OutputBecomesInput = true;
|
||||
}
|
||||
for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
|
||||
QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
|
||||
Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
|
||||
}
|
||||
|
||||
assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
|
||||
// The LS of a function parameter / return value can only be a power
|
||||
// of 2, starting from 8 bits, up to 128.
|
||||
assert(std::all_of(Sizes.begin(), Sizes.end(),
|
||||
[](unsigned Size) {
|
||||
return Size == 8 || Size == 16 || Size == 32 ||
|
||||
Size == 64 || Size == 128;
|
||||
}) &&
|
||||
"Invalid size");
|
||||
|
||||
return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
|
||||
*std::max_element(std::begin(Sizes), std::end(Sizes)),
|
||||
OutputBecomesInput);
|
||||
}
|
||||
|
||||
/// Mangle the parameter part of the vector function name according to
|
||||
/// their OpenMP classification. The mangling function is defined in
|
||||
/// section 3.5 of the AAVFABI.
|
||||
static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
|
||||
SmallString<256> Buffer;
|
||||
llvm::raw_svector_ostream Out(Buffer);
|
||||
for (const auto &ParamAttr : ParamAttrs) {
|
||||
switch (ParamAttr.Kind) {
|
||||
case LinearWithVarStride:
|
||||
Out << "ls" << ParamAttr.StrideOrArg;
|
||||
break;
|
||||
case Linear:
|
||||
Out << 'l';
|
||||
// Don't print the step value if it is not present or if it is
|
||||
// equal to 1.
|
||||
if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1)
|
||||
Out << ParamAttr.StrideOrArg;
|
||||
break;
|
||||
case Uniform:
|
||||
Out << 'u';
|
||||
break;
|
||||
case Vector:
|
||||
Out << 'v';
|
||||
break;
|
||||
}
|
||||
|
||||
if (!!ParamAttr.Alignment)
|
||||
Out << 'a' << ParamAttr.Alignment;
|
||||
}
|
||||
|
||||
return Out.str();
|
||||
}
|
||||
|
||||
// Function used to add the attribute. The parameter `VLEN` is
|
||||
// templated to allow the use of "x" when targeting scalable functions
|
||||
// for SVE.
|
||||
template <typename T>
|
||||
static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
|
||||
char ISA, StringRef ParSeq,
|
||||
StringRef MangledName, bool OutputBecomesInput,
|
||||
llvm::Function *Fn) {
|
||||
SmallString<256> Buffer;
|
||||
llvm::raw_svector_ostream Out(Buffer);
|
||||
Out << Prefix << ISA << LMask << VLEN;
|
||||
if (OutputBecomesInput)
|
||||
Out << "v";
|
||||
Out << ParSeq << "_" << MangledName;
|
||||
Fn->addFnAttr(Out.str());
|
||||
}
|
||||
|
||||
// Helper function to generate the Advanced SIMD names depending on
|
||||
// the value of the NDS when simdlen is not present.
|
||||
static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
|
||||
StringRef Prefix, char ISA,
|
||||
StringRef ParSeq, StringRef MangledName,
|
||||
bool OutputBecomesInput,
|
||||
llvm::Function *Fn) {
|
||||
switch (NDS) {
|
||||
case 8:
|
||||
addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
|
||||
OutputBecomesInput, Fn);
|
||||
addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
|
||||
OutputBecomesInput, Fn);
|
||||
break;
|
||||
case 16:
|
||||
addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
|
||||
OutputBecomesInput, Fn);
|
||||
addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
|
||||
OutputBecomesInput, Fn);
|
||||
break;
|
||||
case 32:
|
||||
addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
|
||||
OutputBecomesInput, Fn);
|
||||
addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
|
||||
OutputBecomesInput, Fn);
|
||||
break;
|
||||
case 64:
|
||||
case 128:
|
||||
addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
|
||||
OutputBecomesInput, Fn);
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("Scalar type is too wide.");
|
||||
}
|
||||
}
|
||||
|
||||
/// Emit vector function attributes for AArch64, as defined in the AAVFABI.
|
||||
static void emitAArch64DeclareSimdFunction(
|
||||
CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
|
||||
ArrayRef<ParamAttrTy> ParamAttrs,
|
||||
OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
|
||||
char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
|
||||
|
||||
// Get basic data for building the vector signature.
|
||||
const auto Data = getNDSWDS(FD, ParamAttrs);
|
||||
const unsigned NDS = std::get<0>(Data);
|
||||
const unsigned WDS = std::get<1>(Data);
|
||||
const bool OutputBecomesInput = std::get<2>(Data);
|
||||
|
||||
// Check the values provided via `simdlen` by the user.
|
||||
// 1. A `simdlen(1)` doesn't produce vector signatures,
|
||||
if (UserVLEN == 1) {
|
||||
unsigned DiagID = CGM.getDiags().getCustomDiagID(
|
||||
DiagnosticsEngine::Warning,
|
||||
"The clause simdlen(1) has no effect when targeting aarch64.");
|
||||
CGM.getDiags().Report(SLoc, DiagID);
|
||||
return;
|
||||
}
|
||||
|
||||
// 2. Section 3.3.1, item 1: user input must be a power of 2 for
|
||||
// Advanced SIMD output.
|
||||
if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
|
||||
unsigned DiagID = CGM.getDiags().getCustomDiagID(
|
||||
DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
|
||||
"power of 2 when targeting Advanced SIMD.");
|
||||
CGM.getDiags().Report(SLoc, DiagID);
|
||||
return;
|
||||
}
|
||||
|
||||
// 3. Section 3.4.1. SVE fixed lengh must obey the architectural
|
||||
// limits.
|
||||
if (ISA == 's' && UserVLEN != 0) {
|
||||
if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
|
||||
unsigned DiagID = CGM.getDiags().getCustomDiagID(
|
||||
DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
|
||||
"lanes in the architectural constraints "
|
||||
"for SVE (min is 128-bit, max is "
|
||||
"2048-bit, by steps of 128-bit)");
|
||||
CGM.getDiags().Report(SLoc, DiagID) << WDS;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Sort out parameter sequence.
|
||||
const std::string ParSeq = mangleVectorParameters(ParamAttrs);
|
||||
StringRef Prefix = "_ZGV";
|
||||
// Generate simdlen from user input (if any).
|
||||
if (UserVLEN) {
|
||||
if (ISA == 's') {
|
||||
// SVE generates only a masked function.
|
||||
addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
|
||||
OutputBecomesInput, Fn);
|
||||
} else {
|
||||
assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
|
||||
// Advanced SIMD generates one or two functions, depending on
|
||||
// the `[not]inbranch` clause.
|
||||
switch (State) {
|
||||
case OMPDeclareSimdDeclAttr::BS_Undefined:
|
||||
addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
|
||||
OutputBecomesInput, Fn);
|
||||
addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
|
||||
OutputBecomesInput, Fn);
|
||||
break;
|
||||
case OMPDeclareSimdDeclAttr::BS_Notinbranch:
|
||||
addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
|
||||
OutputBecomesInput, Fn);
|
||||
break;
|
||||
case OMPDeclareSimdDeclAttr::BS_Inbranch:
|
||||
addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
|
||||
OutputBecomesInput, Fn);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// If no user simdlen is provided, follow the AAVFABI rules for
|
||||
// generating the vector length.
|
||||
if (ISA == 's') {
|
||||
// SVE, section 3.4.1, item 1.
|
||||
addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
|
||||
OutputBecomesInput, Fn);
|
||||
} else {
|
||||
assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
|
||||
// Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
|
||||
// two vector names depending on the use of the clause
|
||||
// `[not]inbranch`.
|
||||
switch (State) {
|
||||
case OMPDeclareSimdDeclAttr::BS_Undefined:
|
||||
addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
|
||||
OutputBecomesInput, Fn);
|
||||
addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
|
||||
OutputBecomesInput, Fn);
|
||||
break;
|
||||
case OMPDeclareSimdDeclAttr::BS_Notinbranch:
|
||||
addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
|
||||
OutputBecomesInput, Fn);
|
||||
break;
|
||||
case OMPDeclareSimdDeclAttr::BS_Inbranch:
|
||||
addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
|
||||
OutputBecomesInput, Fn);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
|
||||
llvm::Function *Fn) {
|
||||
ASTContext &C = CGM.getContext();
|
||||
|
@ -9734,12 +10035,26 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
|
|||
++MI;
|
||||
}
|
||||
llvm::APSInt VLENVal;
|
||||
if (const Expr *VLEN = Attr->getSimdlen())
|
||||
VLENVal = VLEN->EvaluateKnownConstInt(C);
|
||||
SourceLocation ExprLoc;
|
||||
const Expr *VLENExpr = Attr->getSimdlen();
|
||||
if (VLENExpr) {
|
||||
VLENVal = VLENExpr->EvaluateKnownConstInt(C);
|
||||
ExprLoc = VLENExpr->getExprLoc();
|
||||
}
|
||||
OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
|
||||
if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
|
||||
CGM.getTriple().getArch() == llvm::Triple::x86_64)
|
||||
CGM.getTriple().getArch() == llvm::Triple::x86_64) {
|
||||
emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
|
||||
} else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
|
||||
unsigned VLEN = VLENVal.getExtValue();
|
||||
StringRef MangledName = Fn->getName();
|
||||
if (CGM.getTarget().hasFeature("sve"))
|
||||
emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
|
||||
MangledName, 's', 128, Fn, ExprLoc);
|
||||
if (CGM.getTarget().hasFeature("neon"))
|
||||
emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
|
||||
MangledName, 'n', 128, Fn, ExprLoc);
|
||||
}
|
||||
}
|
||||
FD = FD->getPreviousDecl();
|
||||
}
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
#ifndef LLVM_CLANG_TEST_OPENMP_INPUTS_DECLARE_SIMD_FIX_H
|
||||
#define LLVM_CLANG_TEST_OPENMP_INPUTS_DECLARE_SIMD_FIX_H
|
||||
|
||||
#pragma omp declare simd
|
||||
float foo(float a, float b, int c);
|
||||
float bar(float a, float b, int c);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,190 @@
|
|||
// -fopemp and -fopenmp-simd behavior are expected to be the same.
|
||||
|
||||
// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -fopenmp -x c -emit-llvm %s -o - -femit-all-decls | FileCheck %s --check-prefix=AARCH64
|
||||
// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -fopenmp-simd -x c -emit-llvm %s -o - -femit-all-decls | FileCheck %s --check-prefix=AARCH64
|
||||
|
||||
#pragma omp declare simd
|
||||
#pragma omp declare simd simdlen(2)
|
||||
#pragma omp declare simd simdlen(6)
|
||||
#pragma omp declare simd simdlen(8)
|
||||
double foo(float x);
|
||||
|
||||
// AARCH64: "_ZGVnM2v_foo" "_ZGVnM4v_foo" "_ZGVnM8v_foo" "_ZGVnN2v_foo" "_ZGVnN4v_foo" "_ZGVnN8v_foo"
|
||||
// AARCH64-NOT: _ZGVnN6v_foo
|
||||
|
||||
void foo_loop(double *x, float *y, int N) {
|
||||
for (int i = 0; i < N; ++i) {
|
||||
x[i] = foo(y[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// make sure that the following two function by default gets generated
|
||||
// with 4 and 2 lanes, as descrived in the vector ABI
|
||||
#pragma omp declare simd notinbranch
|
||||
float bar(double x);
|
||||
#pragma omp declare simd notinbranch
|
||||
double baz(float x);
|
||||
|
||||
// AARCH64: "_ZGVnN2v_baz" "_ZGVnN4v_baz"
|
||||
// AARCH64-NOT: baz
|
||||
// AARCH64: "_ZGVnN2v_bar" "_ZGVnN4v_bar"
|
||||
// AARCH64-NOT: bar
|
||||
|
||||
void baz_bar_loop(double *x, float *y, int N) {
|
||||
for (int i = 0; i < N; ++i) {
|
||||
x[i] = baz(y[i]);
|
||||
y[i] = bar(x[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/***************************/
|
||||
/* 32-bit integer tests */
|
||||
/***************************/
|
||||
|
||||
#pragma omp declare simd
|
||||
#pragma omp declare simd simdlen(2)
|
||||
#pragma omp declare simd simdlen(6)
|
||||
#pragma omp declare simd simdlen(8)
|
||||
long foo_int(int x);
|
||||
|
||||
// AARCH64: "_ZGVnN2v_foo_int" "_ZGVnN4v_foo_int" "_ZGVnN8v_foo_int"
|
||||
// No non power of two
|
||||
// AARCH64-NOT: _ZGVnN6v_foo_int
|
||||
|
||||
void foo_int_loop(long *x, int *y, int N) {
|
||||
for (int i = 0; i < N; ++i) {
|
||||
x[i] = foo_int(y[i]);
|
||||
}
|
||||
}
|
||||
|
||||
#pragma omp declare simd
|
||||
char simple_8bit(char);
|
||||
// AARCH64: "_ZGVnM16v_simple_8bit" "_ZGVnM8v_simple_8bit" "_ZGVnN16v_simple_8bit" "_ZGVnN8v_simple_8bit"
|
||||
#pragma omp declare simd
|
||||
short simple_16bit(short);
|
||||
// AARCH64: "_ZGVnM4v_simple_16bit" "_ZGVnM8v_simple_16bit" "_ZGVnN4v_simple_16bit" "_ZGVnN8v_simple_16bit"
|
||||
#pragma omp declare simd
|
||||
int simple_32bit(int);
|
||||
// AARCH64: "_ZGVnM2v_simple_32bit" "_ZGVnM4v_simple_32bit" "_ZGVnN2v_simple_32bit" "_ZGVnN4v_simple_32bit"
|
||||
#pragma omp declare simd
|
||||
long simple_64bit(long);
|
||||
// AARCH64: "_ZGVnM2v_simple_64bit" "_ZGVnN2v_simple_64bit"
|
||||
|
||||
#pragma omp declare simd
|
||||
#pragma omp declare simd simdlen(32)
|
||||
char a01(int x);
|
||||
// AARCH64: "_ZGVnN16v_a01" "_ZGVnN32v_a01" "_ZGVnN8v_a01"
|
||||
// AARCH64-NOT: a01
|
||||
|
||||
#pragma omp declare simd
|
||||
#pragma omp declare simd simdlen(2)
|
||||
long a02(short x);
|
||||
// AARCH64: "_ZGVnN2v_a02" "_ZGVnN4v_a02" "_ZGVnN8v_a02"
|
||||
|
||||
// AARCH64-NOT: a02
|
||||
/************/
|
||||
/* pointers */
|
||||
/************/
|
||||
|
||||
#pragma omp declare simd
|
||||
int b01(int *x);
|
||||
// AARCH64: "_ZGVnN4v_b01"
|
||||
// AARCH64-NOT: b01
|
||||
|
||||
#pragma omp declare simd
|
||||
char b02(char *);
|
||||
// AARCH64: "_ZGVnN16v_b02" "_ZGVnN8v_b02"
|
||||
// AARCH64-NOT: b02
|
||||
|
||||
#pragma omp declare simd
|
||||
double *b03(double *);
|
||||
// AARCH64: "_ZGVnN2v_b03"
|
||||
// AARCH64-NOT: b03
|
||||
|
||||
/***********/
|
||||
/* masking */
|
||||
/***********/
|
||||
|
||||
#pragma omp declare simd inbranch
|
||||
int c01(double *x, short y);
|
||||
// AARCH64: "_ZGVnM8vv_c01"
|
||||
// AARCH64-NOT: c01
|
||||
|
||||
#pragma omp declare simd inbranch uniform(x)
|
||||
double c02(double *x, char y);
|
||||
// AARCH64: "_ZGVnM16uv_c02" "_ZGVnM8uv_c02"
|
||||
// AARCH64-NOT: c02
|
||||
|
||||
/*************************/
|
||||
/* sincos-like signature */
|
||||
/*************************/
|
||||
#pragma omp declare simd linear(sin) linear(cos)
|
||||
void sincos(double in, double *sin, double *cos);
|
||||
// AARCH64: "_ZGVnN2vll_sincos"
|
||||
// AARCH64-NOT: sincos
|
||||
|
||||
#pragma omp declare simd linear(sin : 1) linear(cos : 2)
|
||||
void SinCos(double in, double *sin, double *cos);
|
||||
// AARCH64: "_ZGVnN2vll2_SinCos"
|
||||
// AARCH64-NOT: SinCos
|
||||
|
||||
// Selection of tests based on the examples provided in chapter 5 of
|
||||
// the Vector Function ABI specifications for AArch64, at
|
||||
// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
|
||||
|
||||
// Listing 2, p. 18
|
||||
#pragma omp declare simd inbranch uniform(x) linear(val(i) : 4)
|
||||
int foo2(int *x, int i);
|
||||
// AARCH64: "_ZGVnM2ul4_foo2" "_ZGVnM4ul4_foo2"
|
||||
// AARCH64-NOT: foo2
|
||||
|
||||
// Listing 3, p. 18
|
||||
#pragma omp declare simd inbranch uniform(x, c) linear(i \
|
||||
: c)
|
||||
int foo3(int *x, int i, unsigned char c);
|
||||
// AARCH64: "_ZGVnM16uls2u_foo3" "_ZGVnM8uls2u_foo3"
|
||||
// AARCH64-NOT: foo3
|
||||
|
||||
// Listing 6, p. 19
|
||||
#pragma omp declare simd linear(x) aligned(x : 16) simdlen(4)
|
||||
int foo4(int *x, float y);
|
||||
// AARCH64: "_ZGVnM4la16v_foo4" "_ZGVnN4la16v_foo4"
|
||||
// AARCH64-NOT: foo4
|
||||
|
||||
static int *I;
|
||||
static char *C;
|
||||
static short *S;
|
||||
static long *L;
|
||||
static float *F;
|
||||
static double *D;
|
||||
void do_something() {
|
||||
simple_8bit(*C);
|
||||
simple_16bit(*S);
|
||||
simple_32bit(*I);
|
||||
simple_64bit(*L);
|
||||
*C = a01(*I);
|
||||
*L = a02(*S);
|
||||
*I = b01(I);
|
||||
*C = b02(C);
|
||||
D = b03(D);
|
||||
*I = c01(D, *S);
|
||||
*D = c02(D, *S);
|
||||
sincos(*D, D, D);
|
||||
SinCos(*D, D, D);
|
||||
foo2(I, *I);
|
||||
foo3(I, *I, *C);
|
||||
foo4(I, *F);
|
||||
}
|
||||
|
||||
typedef struct S {
|
||||
char R, G, B;
|
||||
} STy;
|
||||
#pragma omp declare simd notinbranch
|
||||
STy DoRGB(STy x);
|
||||
// AARCH64: "_ZGVnN2v_DoRGB"
|
||||
|
||||
static STy *RGBData;
|
||||
|
||||
void do_rgb_stuff() {
|
||||
DoRGB(*RGBData);
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
// -fopemp and -fopenmp-simd behavior are expected to be the same.
|
||||
|
||||
// RUN: %clang_cc1 -verify -triple aarch64-linux-gnu -target-feature +neon -fopenmp -x c++ -emit-llvm %s -o - -femit-all-decls -verify| FileCheck %s --check-prefix=ADVSIMD
|
||||
// RUN: %clang_cc1 -verify -triple aarch64-linux-gnu -target-feature +sve -fopenmp -x c++ -emit-llvm %s -o - -femit-all-decls -verify| FileCheck %s --check-prefix=SVE
|
||||
|
||||
// RUN: %clang_cc1 -verify -triple aarch64-linux-gnu -target-feature +neon -fopenmp-simd -x c++ -emit-llvm %s -o - -femit-all-decls -verify| FileCheck %s --check-prefix=ADVSIMD
|
||||
// RUN: %clang_cc1 -verify -triple aarch64-linux-gnu -target-feature +sve -fopenmp-simd -x c++ -emit-llvm %s -o - -femit-all-decls -verify| FileCheck %s --check-prefix=SVE
|
||||
|
||||
// expected-no-diagnostics
|
||||
|
||||
#pragma omp declare simd
|
||||
double f(double x);
|
||||
|
||||
#pragma omp declare simd
|
||||
float f(float x);
|
||||
|
||||
void aaa(double *x, double *y, int N) {
|
||||
for (int i = 0; i < N; ++i) {
|
||||
x[i] = f(y[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void aaa(float *x, float *y, int N) {
|
||||
for (int i = 0; i < N; ++i) {
|
||||
x[i] = f(y[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// ADVSIMD: "_ZGVnN2v__Z1fd"
|
||||
// ADVSIMD-NOT: _Z1fd
|
||||
// ADVSIMD: "_ZGVnN4v__Z1ff"
|
||||
// ADVSIMD-NOT: _Z1fF
|
||||
|
||||
// SVE: "_ZGVsMxv__Z1fd"
|
||||
// SVE-NOT: _Z1fd
|
||||
// SVE: "_ZGVsMxv__Z1ff"
|
||||
// SVE-NOT: _Z1ff
|
|
@ -0,0 +1,26 @@
|
|||
// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -fopenmp -x c -std=c11 -emit-llvm %s -o - -femit-all-decls | FileCheck %s
|
||||
|
||||
// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +sve -fopenmp -x c -std=c11 -emit-llvm %s -o - -femit-all-decls | FileCheck %s --check-prefix=SVE
|
||||
|
||||
#pragma omp declare simd
|
||||
#pragma omp declare simd simdlen(4) notinbranch
|
||||
double _Complex double_complex(double _Complex);
|
||||
// CHECK: "_ZGVnM2v_double_complex" "_ZGVnN2v_double_complex" "_ZGVnN4v_double_complex"
|
||||
// CHECK-NOT: double_complex
|
||||
// SVE: "_ZGVsM4v_double_complex" "_ZGVsMxv_double_complex"
|
||||
// SVE-NOT: double_complex
|
||||
|
||||
#pragma omp declare simd
|
||||
#pragma omp declare simd simdlen(8) notinbranch
|
||||
float _Complex float_complex(float _Complex);
|
||||
// CHECK: "_ZGVnM2v_float_complex" "_ZGVnN2v_float_complex" "_ZGVnN8v_float_complex"
|
||||
// CHECK-NOT: float_complex
|
||||
// SVE: "_ZGVsM8v_float_complex" "_ZGVsMxv_float_complex"
|
||||
// SVE-NOT: float_complex
|
||||
|
||||
static double _Complex *DC;
|
||||
static float _Complex *DF;
|
||||
void call_the_complex_functions() {
|
||||
double_complex(*DC);
|
||||
float_complex(*DF);
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
// This test is making sure that no crash happens.
|
||||
|
||||
// RUN: %clang -o - -fno-fast-math -S -target aarch64-linux-gnu \
|
||||
// RUN: -fopenmp -O3 -march=armv8-a -c %s | FileCheck %s
|
||||
|
||||
// RUN: %clang -o - -fno-fast-math -S -target aarch64-linux-gnu \
|
||||
// RUN: -fopenmp-simd -O3 -march=armv8-a -c %s | FileCheck %s
|
||||
|
||||
// RUN: %clang -o - -fno-fast-math -S -target aarch64-linux-gnu \
|
||||
// RUN: -fopenmp -O3 -march=armv8-a+sve -c %s | FileCheck %s
|
||||
|
||||
// RUN: %clang -o - -fno-fast-math -S -target aarch64-linux-gnu \
|
||||
// RUN: -fopenmp-simd -O3 -march=armv8-a+sve -c %s | FileCheck %s
|
||||
|
||||
// loop in the user code, in user_code.c
|
||||
#include "Inputs/declare-simd-fix.h"
|
||||
|
||||
// CHECK-LABEL: do_something:
|
||||
void do_something(int *a, double *b, unsigned N) {
|
||||
for (unsigned i = 0; i < N; ++i) {
|
||||
a[i] = foo(b[0], b[0], 1);
|
||||
}
|
||||
}
|
||||
|
||||
// CHECK-LABEL: do_something_else:
|
||||
void do_something_else(int *a, double *b, unsigned N) {
|
||||
for (unsigned i = 0; i < N; ++i) {
|
||||
a[i] = foo(1.1, 1.2, 1);
|
||||
}
|
||||
}
|
||||
|
||||
// CHECK-LABEL: do_something_more:
|
||||
void do_something_more(int *a, double *b, unsigned N) {
|
||||
for (unsigned i = 0; i < N; ++i) {
|
||||
a[i] = foo(b[i], b[i], a[1]);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
// -fopemp and -fopenmp-simd behavior are expected to be the same
|
||||
|
||||
// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +sve \
|
||||
// RUN: -fopenmp -x c -emit-llvm %s -o - -femit-all-decls | FileCheck %s
|
||||
|
||||
// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +sve \
|
||||
// RUN: -fopenmp-simd -x c -emit-llvm %s -o - -femit-all-decls | FileCheck %s
|
||||
|
||||
#pragma omp declare simd
|
||||
#pragma omp declare simd notinbranch
|
||||
#pragma omp declare simd simdlen(2)
|
||||
#pragma omp declare simd simdlen(4)
|
||||
#pragma omp declare simd simdlen(5) // not a multiple of 128-bits
|
||||
#pragma omp declare simd simdlen(6)
|
||||
#pragma omp declare simd simdlen(8)
|
||||
#pragma omp declare simd simdlen(32)
|
||||
#pragma omp declare simd simdlen(34) // requires more than 2048 bits
|
||||
double foo(float x);
|
||||
|
||||
// CHECK-DAG: "_ZGVsM2v_foo" "_ZGVsM32v_foo" "_ZGVsM4v_foo" "_ZGVsM6v_foo" "_ZGVsM8v_foo" "_ZGVsMxv_foo"
|
||||
// CHECK-NOT: _ZGVsN
|
||||
// CHECK-NOT: _ZGVsM5v_foo
|
||||
// CHECK-NOT: _ZGVsM34v_foo
|
||||
// CHECK-NOT: foo
|
||||
|
||||
void foo_loop(double *x, float *y, int N) {
|
||||
for (int i = 0; i < N; ++i) {
|
||||
x[i] = foo(y[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// test integers
|
||||
|
||||
#pragma omp declare simd notinbranch
|
||||
char a01(int x);
|
||||
// CHECK-DAG: _ZGVsMxv_a01
|
||||
// CHECK-NOT: a01
|
||||
|
||||
static int *in;
|
||||
static char *out;
|
||||
void do_something() {
|
||||
*out = a01(*in);
|
||||
}
|
|
@ -0,0 +1,16 @@
|
|||
// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -fopenmp %s -S -o %t -verify
|
||||
// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -fopenmp-simd %s -S -o %t -verify
|
||||
|
||||
#pragma omp declare simd simdlen(6)
|
||||
double foo(float x);
|
||||
// expected-warning@-2{{The value specified in simdlen must be a power of 2 when targeting Advanced SIMD.}}
|
||||
#pragma omp declare simd simdlen(1)
|
||||
float bar(double x);
|
||||
// expected-warning@-2{{The clause simdlen(1) has no effect when targeting aarch64.}}
|
||||
|
||||
void foo_loop(double *x, float *y, int N) {
|
||||
for (int i = 0; i < N; ++i) {
|
||||
x[i] = foo(y[i]);
|
||||
y[i] = bar(x[i]);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +sve -fopenmp %s -S -o %t -verify
|
||||
// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +sve -fopenmp-simd %s -S -o %t -verify
|
||||
|
||||
#pragma omp declare simd simdlen(66)
|
||||
double foo(float x);
|
||||
//expected-warning@-2{{The clause simdlen must fit the 64-bit lanes in the architectural constraints for SVE (min is 128-bit, max is 2048-bit, by steps of 128-bit)}}
|
||||
|
||||
void foo_loop(double *x, float *y, int N) {
|
||||
for (int i = 0; i < N; ++i) {
|
||||
x[i] = foo(y[i]);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue