forked from OSchip/llvm-project
[AArch64] Add ARMv8.2-A FP16 scalar intrinsics
https://reviews.llvm.org/D41792 llvm-svn: 323006
This commit is contained in:
parent
e93c63d468
commit
ce8746d178
|
@ -16,6 +16,7 @@
|
|||
|
||||
#define GET_NEON_BUILTINS
|
||||
#include "clang/Basic/arm_neon.inc"
|
||||
#include "clang/Basic/arm_fp16.inc"
|
||||
#undef GET_NEON_BUILTINS
|
||||
|
||||
#undef BUILTIN
|
||||
|
|
|
@ -46,3 +46,7 @@ clang_tablegen(arm_neon.inc -gen-arm-neon-sema
|
|||
-I ${CMAKE_CURRENT_SOURCE_DIR}/../../
|
||||
SOURCE arm_neon.td
|
||||
TARGET ClangARMNeon)
|
||||
clang_tablegen(arm_fp16.inc -gen-arm-neon-sema
|
||||
-I ${CMAKE_CURRENT_SOURCE_DIR}/../../
|
||||
SOURCE arm_fp16.td
|
||||
TARGET ClangARMFP16)
|
||||
|
|
|
@ -0,0 +1,131 @@
|
|||
//===--- arm_fp16.td - ARM FP16 compiler interface ------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines the TableGen definitions from which the ARM FP16 header
|
||||
// file will be generated.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
include "arm_neon_incl.td"
|
||||
|
||||
// ARMv8.2-A FP16 intrinsics.
|
||||
let ArchGuard = "defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) && defined(__aarch64__)" in {
|
||||
|
||||
// Negate
|
||||
def VNEGSH : SInst<"vneg", "ss", "Sh">;
|
||||
|
||||
// Reciprocal/Sqrt
|
||||
def SCALAR_FRECPSH : IInst<"vrecps", "sss", "Sh">;
|
||||
def FSQRTSH : SInst<"vsqrt", "ss", "Sh">;
|
||||
def SCALAR_FRSQRTSH : IInst<"vrsqrts", "sss", "Sh">;
|
||||
|
||||
// Reciprocal Estimate
|
||||
def SCALAR_FRECPEH : IInst<"vrecpe", "ss", "Sh">;
|
||||
|
||||
// Reciprocal Exponent
|
||||
def SCALAR_FRECPXH : IInst<"vrecpx", "ss", "Sh">;
|
||||
|
||||
// Reciprocal Square Root Estimate
|
||||
def SCALAR_FRSQRTEH : IInst<"vrsqrte", "ss", "Sh">;
|
||||
|
||||
// Rounding
|
||||
def FRINTZ_S64H : SInst<"vrnd", "ss", "Sh">;
|
||||
def FRINTA_S64H : SInst<"vrnda", "ss", "Sh">;
|
||||
def FRINTI_S64H : SInst<"vrndi", "ss", "Sh">;
|
||||
def FRINTM_S64H : SInst<"vrndm", "ss", "Sh">;
|
||||
def FRINTN_S64H : SInst<"vrndn", "ss", "Sh">;
|
||||
def FRINTP_S64H : SInst<"vrndp", "ss", "Sh">;
|
||||
def FRINTX_S64H : SInst<"vrndx", "ss", "Sh">;
|
||||
|
||||
// Conversion
|
||||
def SCALAR_SCVTFSH : SInst<"vcvth_f16", "Ys", "silUsUiUl">;
|
||||
def SCALAR_FCVTZSH : SInst<"vcvt_s16", "$s", "Sh">;
|
||||
def SCALAR_FCVTZSH1 : SInst<"vcvt_s32", "Is", "Sh">;
|
||||
def SCALAR_FCVTZSH2 : SInst<"vcvt_s64", "Ls", "Sh">;
|
||||
def SCALAR_FCVTZUH : SInst<"vcvt_u16", "bs", "Sh">;
|
||||
def SCALAR_FCVTZUH1 : SInst<"vcvt_u32", "Us", "Sh">;
|
||||
def SCALAR_FCVTZUH2 : SInst<"vcvt_u64", "Os", "Sh">;
|
||||
def SCALAR_FCVTASH : SInst<"vcvta_s16", "$s", "Sh">;
|
||||
def SCALAR_FCVTASH1 : SInst<"vcvta_s32", "Is", "Sh">;
|
||||
def SCALAR_FCVTASH2 : SInst<"vcvta_s64", "Ls", "Sh">;
|
||||
def SCALAR_FCVTAUH : SInst<"vcvta_u16", "bs", "Sh">;
|
||||
def SCALAR_FCVTAUH1 : SInst<"vcvta_u32", "Us", "Sh">;
|
||||
def SCALAR_FCVTAUH2 : SInst<"vcvta_u64", "Os", "Sh">;
|
||||
def SCALAR_FCVTMSH : SInst<"vcvtm_s16", "$s", "Sh">;
|
||||
def SCALAR_FCVTMSH1 : SInst<"vcvtm_s32", "Is", "Sh">;
|
||||
def SCALAR_FCVTMSH2 : SInst<"vcvtm_s64", "Ls", "Sh">;
|
||||
def SCALAR_FCVTMUH : SInst<"vcvtm_u16", "bs", "Sh">;
|
||||
def SCALAR_FCVTMUH1 : SInst<"vcvtm_u32", "Us", "Sh">;
|
||||
def SCALAR_FCVTMUH2 : SInst<"vcvtm_u64", "Os", "Sh">;
|
||||
def SCALAR_FCVTNSH : SInst<"vcvtn_s16", "$s", "Sh">;
|
||||
def SCALAR_FCVTNSH1 : SInst<"vcvtn_s32", "Is", "Sh">;
|
||||
def SCALAR_FCVTNSH2 : SInst<"vcvtn_s64", "Ls", "Sh">;
|
||||
def SCALAR_FCVTNUH : SInst<"vcvtn_u16", "bs", "Sh">;
|
||||
def SCALAR_FCVTNUH1 : SInst<"vcvtn_u32", "Us", "Sh">;
|
||||
def SCALAR_FCVTNUH2 : SInst<"vcvtn_u64", "Os", "Sh">;
|
||||
def SCALAR_FCVTPSH : SInst<"vcvtp_s16", "$s", "Sh">;
|
||||
def SCALAR_FCVTPSH1 : SInst<"vcvtp_s32", "Is", "Sh">;
|
||||
def SCALAR_FCVTPSH2 : SInst<"vcvtp_s64", "Ls", "Sh">;
|
||||
def SCALAR_FCVTPUH : SInst<"vcvtp_u16", "bs", "Sh">;
|
||||
def SCALAR_FCVTPUH1 : SInst<"vcvtp_u32", "Us", "Sh">;
|
||||
def SCALAR_FCVTPUH2 : SInst<"vcvtp_u64", "Os", "Sh">;
|
||||
|
||||
def SCALAR_SCVTFSHO : SInst<"vcvth_n_f16", "Ysi", "silUsUiUl">;
|
||||
def SCALAR_FCVTZSHO : SInst<"vcvt_n_s16", "$si", "Sh">;
|
||||
def SCALAR_FCVTZSH1O: SInst<"vcvt_n_s32", "Isi", "Sh">;
|
||||
def SCALAR_FCVTZSH2O: SInst<"vcvt_n_s64", "Lsi", "Sh">;
|
||||
def SCALAR_FCVTZUHO : SInst<"vcvt_n_u16", "bsi", "Sh">;
|
||||
def SCALAR_FCVTZUH1O: SInst<"vcvt_n_u32", "Usi", "Sh">;
|
||||
def SCALAR_FCVTZUH2O: SInst<"vcvt_n_u64", "Osi", "Sh">;
|
||||
|
||||
// Comparison
|
||||
def SCALAR_CMEQRH : SInst<"vceq", "bss", "Sh">;
|
||||
def SCALAR_CMEQZH : SInst<"vceqz", "bs", "Sh">;
|
||||
def SCALAR_CMGERH : SInst<"vcge", "bss", "Sh">;
|
||||
def SCALAR_CMGEZH : SInst<"vcgez", "bs", "Sh">;
|
||||
def SCALAR_CMGTRH : SInst<"vcgt", "bss", "Sh">;
|
||||
def SCALAR_CMGTZH : SInst<"vcgtz", "bs", "Sh">;
|
||||
def SCALAR_CMLERH : SInst<"vcle", "bss", "Sh">;
|
||||
def SCALAR_CMLEZH : SInst<"vclez", "bs", "Sh">;
|
||||
def SCALAR_CMLTH : SInst<"vclt", "bss", "Sh">;
|
||||
def SCALAR_CMLTZH : SInst<"vcltz", "bs", "Sh">;
|
||||
|
||||
// Absolute Compare Mask Greater Than Or Equal
|
||||
def SCALAR_FACGEH : IInst<"vcage", "bss", "Sh">;
|
||||
def SCALAR_FACLEH : IInst<"vcale", "bss", "Sh">;
|
||||
|
||||
// Absolute Compare Mask Greater Than
|
||||
def SCALAR_FACGT : IInst<"vcagt", "bss", "Sh">;
|
||||
def SCALAR_FACLT : IInst<"vcalt", "bss", "Sh">;
|
||||
|
||||
// Scalar Absolute Value
|
||||
def SCALAR_ABSH : SInst<"vabs", "ss", "Sh">;
|
||||
|
||||
// Scalar Absolute Difference
|
||||
def SCALAR_ABDH: IInst<"vabd", "sss", "Sh">;
|
||||
|
||||
// Add/Sub
|
||||
def VADDSH : SInst<"vadd", "sss", "Sh">;
|
||||
def VSUBHS : SInst<"vsub", "sss", "Sh">;
|
||||
|
||||
// Max/Min
|
||||
def VMAXHS : SInst<"vmax", "sss", "Sh">;
|
||||
def VMINHS : SInst<"vmin", "sss", "Sh">;
|
||||
def FMAXNMHS : SInst<"vmaxnm", "sss", "Sh">;
|
||||
def FMINNMHS : SInst<"vminnm", "sss", "Sh">;
|
||||
|
||||
// Multiplication/Division
|
||||
def VMULHS : SInst<"vmul", "sss", "Sh">;
|
||||
def MULXHS : SInst<"vmulx", "sss", "Sh">;
|
||||
def FDIVHS : SInst<"vdiv", "sss", "Sh">;
|
||||
|
||||
// Vector fused multiply-add operations
|
||||
def VFMAHS : SInst<"vfma", "ssss", "Sh">;
|
||||
def VFMSHS : SInst<"vfms", "ssss", "Sh">;
|
||||
}
|
|
@ -11,309 +11,8 @@
|
|||
// file will be generated. See ARM document DUI0348B.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Each intrinsic is a subclass of the Inst class. An intrinsic can either
|
||||
// generate a __builtin_* call or it can expand to a set of generic operations.
|
||||
//
|
||||
// The operations are subclasses of Operation providing a list of DAGs, the
|
||||
// last of which is the return value. The available DAG nodes are documented
|
||||
// below.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// The base Operation class. All operations must subclass this.
|
||||
class Operation<list<dag> ops=[]> {
|
||||
list<dag> Ops = ops;
|
||||
bit Unavailable = 0;
|
||||
}
|
||||
// An operation that only contains a single DAG.
|
||||
class Op<dag op> : Operation<[op]>;
|
||||
// A shorter version of Operation - takes a list of DAGs. The last of these will
|
||||
// be the return value.
|
||||
class LOp<list<dag> ops> : Operation<ops>;
|
||||
|
||||
// These defs and classes are used internally to implement the SetTheory
|
||||
// expansion and should be ignored.
|
||||
foreach Index = 0-63 in
|
||||
def sv##Index;
|
||||
class MaskExpand;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Available operations
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// DAG arguments can either be operations (documented below) or variables.
|
||||
// Variables are prefixed with '$'. There are variables for each input argument,
|
||||
// with the name $pN, where N starts at zero. So the zero'th argument will be
|
||||
// $p0, the first $p1 etc.
|
||||
|
||||
// op - Binary or unary operator, depending on the number of arguments. The
|
||||
// operator itself is just treated as a raw string and is not checked.
|
||||
// example: (op "+", $p0, $p1) -> "__p0 + __p1".
|
||||
// (op "-", $p0) -> "-__p0"
|
||||
def op;
|
||||
// call - Invoke another intrinsic. The input types are type checked and
|
||||
// disambiguated. If there is no intrinsic defined that takes
|
||||
// the given types (or if there is a type ambiguity) an error is
|
||||
// generated at tblgen time. The name of the intrinsic is the raw
|
||||
// name as given to the Inst class (not mangled).
|
||||
// example: (call "vget_high", $p0) -> "vgetq_high_s16(__p0)"
|
||||
// (assuming $p0 has type int16x8_t).
|
||||
def call;
|
||||
// cast - Perform a cast to a different type. This gets emitted as a static
|
||||
// C-style cast. For a pure reinterpret cast (T x = *(T*)&y), use
|
||||
// "bitcast".
|
||||
//
|
||||
// The syntax is (cast MOD* VAL). The last argument is the value to
|
||||
// cast, preceded by a sequence of type modifiers. The target type
|
||||
// starts off as the type of VAL, and is modified by MOD in sequence.
|
||||
// The available modifiers are:
|
||||
// - $X - Take the type of parameter/variable X. For example:
|
||||
// (cast $p0, $p1) would cast $p1 to the type of $p0.
|
||||
// - "R" - The type of the return type.
|
||||
// - A typedef string - A NEON or stdint.h type that is then parsed.
|
||||
// for example: (cast "uint32x4_t", $p0).
|
||||
// - "U" - Make the type unsigned.
|
||||
// - "S" - Make the type signed.
|
||||
// - "H" - Halve the number of lanes in the type.
|
||||
// - "D" - Double the number of lanes in the type.
|
||||
// - "8" - Convert type to an equivalent vector of 8-bit signed
|
||||
// integers.
|
||||
// example: (cast "R", "U", $p0) -> "(uint32x4_t)__p0" (assuming the return
|
||||
// value is of type "int32x4_t".
|
||||
// (cast $p0, "D", "8", $p1) -> "(int8x16_t)__p1" (assuming __p0
|
||||
// has type float64x1_t or any other vector type of 64 bits).
|
||||
// (cast "int32_t", $p2) -> "(int32_t)__p2"
|
||||
def cast;
|
||||
// bitcast - Same as "cast", except a reinterpret-cast is produced:
|
||||
// (bitcast "T", $p0) -> "*(T*)&__p0".
|
||||
// The VAL argument is saved to a temporary so it can be used
|
||||
// as an l-value.
|
||||
def bitcast;
|
||||
// dup - Take a scalar argument and create a vector by duplicating it into
|
||||
// all lanes. The type of the vector is the base type of the intrinsic.
|
||||
// example: (dup $p1) -> "(uint32x2_t) {__p1, __p1}" (assuming the base type
|
||||
// is uint32x2_t).
|
||||
def dup;
|
||||
// splat - Take a vector and a lane index, and return a vector of the same type
|
||||
// containing repeated instances of the source vector at the lane index.
|
||||
// example: (splat $p0, $p1) ->
|
||||
// "__builtin_shufflevector(__p0, __p0, __p1, __p1, __p1, __p1)"
|
||||
// (assuming __p0 has four elements).
|
||||
def splat;
|
||||
// save_temp - Create a temporary (local) variable. The variable takes a name
|
||||
// based on the zero'th parameter and can be referenced using
|
||||
// using that name in subsequent DAGs in the same
|
||||
// operation. The scope of a temp is the operation. If a variable
|
||||
// with the given name already exists, an error will be given at
|
||||
// tblgen time.
|
||||
// example: [(save_temp $var, (call "foo", $p0)),
|
||||
// (op "+", $var, $p1)] ->
|
||||
// "int32x2_t __var = foo(__p0); return __var + __p1;"
|
||||
def save_temp;
|
||||
// name_replace - Return the name of the current intrinsic with the first
|
||||
// argument replaced by the second argument. Raises an error if
|
||||
// the first argument does not exist in the intrinsic name.
|
||||
// example: (call (name_replace "_high_", "_"), $p0) (to call the non-high
|
||||
// version of this intrinsic).
|
||||
def name_replace;
|
||||
// literal - Create a literal piece of code. The code is treated as a raw
|
||||
// string, and must be given a type. The type is a stdint.h or
|
||||
// NEON intrinsic type as given to (cast).
|
||||
// example: (literal "int32_t", "0")
|
||||
def literal;
|
||||
// shuffle - Create a vector shuffle. The syntax is (shuffle ARG0, ARG1, MASK).
|
||||
// The MASK argument is a set of elements. The elements are generated
|
||||
// from the two special defs "mask0" and "mask1". "mask0" expands to
|
||||
// the lane indices in sequence for ARG0, and "mask1" expands to
|
||||
// the lane indices in sequence for ARG1. They can be used as-is, e.g.
|
||||
//
|
||||
// (shuffle $p0, $p1, mask0) -> $p0
|
||||
// (shuffle $p0, $p1, mask1) -> $p1
|
||||
//
|
||||
// or, more usefully, they can be manipulated using the SetTheory
|
||||
// operators plus some extra operators defined in the NEON emitter.
|
||||
// The operators are described below.
|
||||
// example: (shuffle $p0, $p1, (add (highhalf mask0), (highhalf mask1))) ->
|
||||
// A concatenation of the high halves of the input vectors.
|
||||
def shuffle;
|
||||
|
||||
// add, interleave, decimate: These set operators are vanilla SetTheory
|
||||
// operators and take their normal definition.
|
||||
def add;
|
||||
def interleave;
|
||||
def decimate;
|
||||
// rotl - Rotate set left by a number of elements.
|
||||
// example: (rotl mask0, 3) -> [3, 4, 5, 6, 0, 1, 2]
|
||||
def rotl;
|
||||
// rotl - Rotate set right by a number of elements.
|
||||
// example: (rotr mask0, 3) -> [4, 5, 6, 0, 1, 2, 3]
|
||||
def rotr;
|
||||
// highhalf - Take only the high half of the input.
|
||||
// example: (highhalf mask0) -> [4, 5, 6, 7] (assuming mask0 had 8 elements)
|
||||
def highhalf;
|
||||
// highhalf - Take only the low half of the input.
|
||||
// example: (lowhalf mask0) -> [0, 1, 2, 3] (assuming mask0 had 8 elements)
|
||||
def lowhalf;
|
||||
// rev - Perform a variable-width reversal of the elements. The zero'th argument
|
||||
// is a width in bits to reverse. The lanes this maps to is determined
|
||||
// based on the element width of the underlying type.
|
||||
// example: (rev 32, mask0) -> [3, 2, 1, 0, 7, 6, 5, 4] (if 8-bit elements)
|
||||
// example: (rev 32, mask0) -> [1, 0, 3, 2] (if 16-bit elements)
|
||||
def rev;
|
||||
// mask0 - The initial sequence of lanes for shuffle ARG0
|
||||
def mask0 : MaskExpand;
|
||||
// mask0 - The initial sequence of lanes for shuffle ARG1
|
||||
def mask1 : MaskExpand;
|
||||
|
||||
def OP_NONE : Operation;
|
||||
def OP_UNAVAILABLE : Operation {
|
||||
let Unavailable = 1;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Instruction definitions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Every intrinsic subclasses "Inst". An intrinsic has a name, a prototype and
|
||||
// a sequence of typespecs.
|
||||
//
|
||||
// The name is the base name of the intrinsic, for example "vget_lane". This is
|
||||
// then mangled by the tblgen backend to add type information ("vget_lane_s16").
|
||||
//
|
||||
// A typespec is a sequence of uppercase characters (modifiers) followed by one
|
||||
// lowercase character. A typespec encodes a particular "base type" of the
|
||||
// intrinsic.
|
||||
//
|
||||
// An example typespec is "Qs" - quad-size short - uint16x8_t. The available
|
||||
// typespec codes are given below.
|
||||
//
|
||||
// The string given to an Inst class is a sequence of typespecs. The intrinsic
|
||||
// is instantiated for every typespec in the sequence. For example "sdQsQd".
|
||||
//
|
||||
// The prototype is a string that defines the return type of the intrinsic
|
||||
// and the type of each argument. The return type and every argument gets a
|
||||
// "modifier" that can change in some way the "base type" of the intrinsic.
|
||||
//
|
||||
// The modifier 'd' means "default" and does not modify the base type in any
|
||||
// way. The available modifiers are given below.
|
||||
//
|
||||
// Typespecs
|
||||
// ---------
|
||||
// c: char
|
||||
// s: short
|
||||
// i: int
|
||||
// l: long
|
||||
// k: 128-bit long
|
||||
// f: float
|
||||
// h: half-float
|
||||
// d: double
|
||||
//
|
||||
// Typespec modifiers
|
||||
// ------------------
|
||||
// S: scalar, only used for function mangling.
|
||||
// U: unsigned
|
||||
// Q: 128b
|
||||
// H: 128b without mangling 'q'
|
||||
// P: polynomial
|
||||
//
|
||||
// Prototype modifiers
|
||||
// -------------------
|
||||
// prototype: return (arg, arg, ...)
|
||||
//
|
||||
// v: void
|
||||
// t: best-fit integer (int/poly args)
|
||||
// x: signed integer (int/float args)
|
||||
// u: unsigned integer (int/float args)
|
||||
// f: float (int args)
|
||||
// F: double (int args)
|
||||
// H: half (int args)
|
||||
// d: default
|
||||
// g: default, ignore 'Q' size modifier.
|
||||
// j: default, force 'Q' size modifier.
|
||||
// w: double width elements, same num elts
|
||||
// n: double width elements, half num elts
|
||||
// h: half width elements, double num elts
|
||||
// q: half width elements, quad num elts
|
||||
// e: half width elements, double num elts, unsigned
|
||||
// m: half width elements, same num elts
|
||||
// i: constant int
|
||||
// l: constant uint64
|
||||
// s: scalar of element type
|
||||
// z: scalar of half width element type, signed
|
||||
// r: scalar of double width element type, signed
|
||||
// a: scalar of element type (splat to vector type)
|
||||
// b: scalar of unsigned integer/long type (int/float args)
|
||||
// $: scalar of signed integer/long type (int/float args)
|
||||
// y: scalar of float
|
||||
// o: scalar of double
|
||||
// k: default elt width, double num elts
|
||||
// 2,3,4: array of default vectors
|
||||
// B,C,D: array of default elts, force 'Q' size modifier.
|
||||
// p: pointer type
|
||||
// c: const pointer type
|
||||
|
||||
// Every intrinsic subclasses Inst.
|
||||
class Inst <string n, string p, string t, Operation o> {
|
||||
string Name = n;
|
||||
string Prototype = p;
|
||||
string Types = t;
|
||||
string ArchGuard = "";
|
||||
|
||||
Operation Operation = o;
|
||||
bit CartesianProductOfTypes = 0;
|
||||
bit BigEndianSafe = 0;
|
||||
bit isShift = 0;
|
||||
bit isScalarShift = 0;
|
||||
bit isScalarNarrowShift = 0;
|
||||
bit isVCVT_N = 0;
|
||||
// For immediate checks: the immediate will be assumed to specify the lane of
|
||||
// a Q register. Only used for intrinsics which end up calling polymorphic
|
||||
// builtins.
|
||||
bit isLaneQ = 0;
|
||||
|
||||
// Certain intrinsics have different names than their representative
|
||||
// instructions. This field allows us to handle this correctly when we
|
||||
// are generating tests.
|
||||
string InstName = "";
|
||||
|
||||
// Certain intrinsics even though they are not a WOpInst or LOpInst,
|
||||
// generate a WOpInst/LOpInst instruction (see below for definition
|
||||
// of a WOpInst/LOpInst). For testing purposes we need to know
|
||||
// this. Ex: vset_lane which outputs vmov instructions.
|
||||
bit isHiddenWInst = 0;
|
||||
bit isHiddenLInst = 0;
|
||||
}
|
||||
|
||||
// The following instruction classes are implemented via builtins.
|
||||
// These declarations are used to generate Builtins.def:
|
||||
//
|
||||
// SInst: Instruction with signed/unsigned suffix (e.g., "s8", "u8", "p8")
|
||||
// IInst: Instruction with generic integer suffix (e.g., "i8")
|
||||
// WInst: Instruction with only bit size suffix (e.g., "8")
|
||||
class SInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {}
|
||||
class IInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {}
|
||||
class WInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {}
|
||||
|
||||
// The following instruction classes are implemented via operators
|
||||
// instead of builtins. As such these declarations are only used for
|
||||
// the purpose of generating tests.
|
||||
//
|
||||
// SOpInst: Instruction with signed/unsigned suffix (e.g., "s8",
|
||||
// "u8", "p8").
|
||||
// IOpInst: Instruction with generic integer suffix (e.g., "i8").
|
||||
// WOpInst: Instruction with bit size only suffix (e.g., "8").
|
||||
// LOpInst: Logical instruction with no bit size suffix.
|
||||
// NoTestOpInst: Intrinsic that has no corresponding instruction.
|
||||
class SOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
|
||||
class IOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
|
||||
class WOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
|
||||
class LOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
|
||||
class NoTestOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Operations
|
||||
//===----------------------------------------------------------------------===//
|
||||
include "arm_neon_incl.td"
|
||||
|
||||
def OP_ADD : Op<(op "+", $p0, $p1)>;
|
||||
def OP_ADDL : Op<(op "+", (call "vmovl", $p0), (call "vmovl", $p1))>;
|
||||
|
|
|
@ -0,0 +1,313 @@
|
|||
//===--- arm_neon_incl.td - ARM NEON compiler interface ------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines data structures shared by arm_neon.td and arm_fp16.td.
|
||||
// It constains base operation classes, operations, instructions, instruction
|
||||
// modifiers, etc.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Each intrinsic is a subclass of the Inst class. An intrinsic can either
|
||||
// generate a __builtin_* call or it can expand to a set of generic operations.
|
||||
//
|
||||
// The operations are subclasses of Operation providing a list of DAGs, the
|
||||
// last of which is the return value. The available DAG nodes are documented
|
||||
// below.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// The base Operation class. All operations must subclass this.
|
||||
class Operation<list<dag> ops=[]> {
|
||||
list<dag> Ops = ops;
|
||||
bit Unavailable = 0;
|
||||
}
|
||||
// An operation that only contains a single DAG.
|
||||
class Op<dag op> : Operation<[op]>;
|
||||
// A shorter version of Operation - takes a list of DAGs. The last of these will
|
||||
// be the return value.
|
||||
class LOp<list<dag> ops> : Operation<ops>;
|
||||
|
||||
// These defs and classes are used internally to implement the SetTheory
|
||||
// expansion and should be ignored.
|
||||
foreach Index = 0-63 in
|
||||
def sv##Index;
|
||||
class MaskExpand;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Available operations
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// DAG arguments can either be operations (documented below) or variables.
|
||||
// Variables are prefixed with '$'. There are variables for each input argument,
|
||||
// with the name $pN, where N starts at zero. So the zero'th argument will be
|
||||
// $p0, the first $p1 etc.
|
||||
|
||||
// op - Binary or unary operator, depending on the number of arguments. The
|
||||
// operator itself is just treated as a raw string and is not checked.
|
||||
// example: (op "+", $p0, $p1) -> "__p0 + __p1".
|
||||
// (op "-", $p0) -> "-__p0"
|
||||
def op;
|
||||
// call - Invoke another intrinsic. The input types are type checked and
|
||||
// disambiguated. If there is no intrinsic defined that takes
|
||||
// the given types (or if there is a type ambiguity) an error is
|
||||
// generated at tblgen time. The name of the intrinsic is the raw
|
||||
// name as given to the Inst class (not mangled).
|
||||
// example: (call "vget_high", $p0) -> "vgetq_high_s16(__p0)"
|
||||
// (assuming $p0 has type int16x8_t).
|
||||
def call;
|
||||
// cast - Perform a cast to a different type. This gets emitted as a static
|
||||
// C-style cast. For a pure reinterpret cast (T x = *(T*)&y), use
|
||||
// "bitcast".
|
||||
//
|
||||
// The syntax is (cast MOD* VAL). The last argument is the value to
|
||||
// cast, preceded by a sequence of type modifiers. The target type
|
||||
// starts off as the type of VAL, and is modified by MOD in sequence.
|
||||
// The available modifiers are:
|
||||
// - $X - Take the type of parameter/variable X. For example:
|
||||
// (cast $p0, $p1) would cast $p1 to the type of $p0.
|
||||
// - "R" - The type of the return type.
|
||||
// - A typedef string - A NEON or stdint.h type that is then parsed.
|
||||
// for example: (cast "uint32x4_t", $p0).
|
||||
// - "U" - Make the type unsigned.
|
||||
// - "S" - Make the type signed.
|
||||
// - "H" - Halve the number of lanes in the type.
|
||||
// - "D" - Double the number of lanes in the type.
|
||||
// - "8" - Convert type to an equivalent vector of 8-bit signed
|
||||
// integers.
|
||||
// example: (cast "R", "U", $p0) -> "(uint32x4_t)__p0" (assuming the return
|
||||
// value is of type "int32x4_t".
|
||||
// (cast $p0, "D", "8", $p1) -> "(int8x16_t)__p1" (assuming __p0
|
||||
// has type float64x1_t or any other vector type of 64 bits).
|
||||
// (cast "int32_t", $p2) -> "(int32_t)__p2"
|
||||
def cast;
|
||||
// bitcast - Same as "cast", except a reinterpret-cast is produced:
|
||||
// (bitcast "T", $p0) -> "*(T*)&__p0".
|
||||
// The VAL argument is saved to a temporary so it can be used
|
||||
// as an l-value.
|
||||
def bitcast;
|
||||
// dup - Take a scalar argument and create a vector by duplicating it into
|
||||
// all lanes. The type of the vector is the base type of the intrinsic.
|
||||
// example: (dup $p1) -> "(uint32x2_t) {__p1, __p1}" (assuming the base type
|
||||
// is uint32x2_t).
|
||||
def dup;
|
||||
// splat - Take a vector and a lane index, and return a vector of the same type
|
||||
// containing repeated instances of the source vector at the lane index.
|
||||
// example: (splat $p0, $p1) ->
|
||||
// "__builtin_shufflevector(__p0, __p0, __p1, __p1, __p1, __p1)"
|
||||
// (assuming __p0 has four elements).
|
||||
def splat;
|
||||
// save_temp - Create a temporary (local) variable. The variable takes a name
|
||||
// based on the zero'th parameter and can be referenced using
|
||||
// using that name in subsequent DAGs in the same
|
||||
// operation. The scope of a temp is the operation. If a variable
|
||||
// with the given name already exists, an error will be given at
|
||||
// tblgen time.
|
||||
// example: [(save_temp $var, (call "foo", $p0)),
|
||||
// (op "+", $var, $p1)] ->
|
||||
// "int32x2_t __var = foo(__p0); return __var + __p1;"
|
||||
def save_temp;
|
||||
// name_replace - Return the name of the current intrinsic with the first
|
||||
// argument replaced by the second argument. Raises an error if
|
||||
// the first argument does not exist in the intrinsic name.
|
||||
// example: (call (name_replace "_high_", "_"), $p0) (to call the non-high
|
||||
// version of this intrinsic).
|
||||
def name_replace;
|
||||
// literal - Create a literal piece of code. The code is treated as a raw
|
||||
// string, and must be given a type. The type is a stdint.h or
|
||||
// NEON intrinsic type as given to (cast).
|
||||
// example: (literal "int32_t", "0")
|
||||
def literal;
|
||||
// shuffle - Create a vector shuffle. The syntax is (shuffle ARG0, ARG1, MASK).
|
||||
// The MASK argument is a set of elements. The elements are generated
|
||||
// from the two special defs "mask0" and "mask1". "mask0" expands to
|
||||
// the lane indices in sequence for ARG0, and "mask1" expands to
|
||||
// the lane indices in sequence for ARG1. They can be used as-is, e.g.
|
||||
//
|
||||
// (shuffle $p0, $p1, mask0) -> $p0
|
||||
// (shuffle $p0, $p1, mask1) -> $p1
|
||||
//
|
||||
// or, more usefully, they can be manipulated using the SetTheory
|
||||
// operators plus some extra operators defined in the NEON emitter.
|
||||
// The operators are described below.
|
||||
// example: (shuffle $p0, $p1, (add (highhalf mask0), (highhalf mask1))) ->
|
||||
// A concatenation of the high halves of the input vectors.
|
||||
def shuffle;
|
||||
|
||||
// add, interleave, decimate: These set operators are vanilla SetTheory
|
||||
// operators and take their normal definition.
|
||||
def add;
|
||||
def interleave;
|
||||
def decimate;
|
||||
// rotl - Rotate set left by a number of elements.
|
||||
// example: (rotl mask0, 3) -> [3, 4, 5, 6, 0, 1, 2]
|
||||
def rotl;
|
||||
// rotl - Rotate set right by a number of elements.
|
||||
// example: (rotr mask0, 3) -> [4, 5, 6, 0, 1, 2, 3]
|
||||
def rotr;
|
||||
// highhalf - Take only the high half of the input.
|
||||
// example: (highhalf mask0) -> [4, 5, 6, 7] (assuming mask0 had 8 elements)
|
||||
def highhalf;
|
||||
// highhalf - Take only the low half of the input.
|
||||
// example: (lowhalf mask0) -> [0, 1, 2, 3] (assuming mask0 had 8 elements)
|
||||
def lowhalf;
|
||||
// rev - Perform a variable-width reversal of the elements. The zero'th argument
|
||||
// is a width in bits to reverse. The lanes this maps to is determined
|
||||
// based on the element width of the underlying type.
|
||||
// example: (rev 32, mask0) -> [3, 2, 1, 0, 7, 6, 5, 4] (if 8-bit elements)
|
||||
// example: (rev 32, mask0) -> [1, 0, 3, 2] (if 16-bit elements)
|
||||
def rev;
|
||||
// mask0 - The initial sequence of lanes for shuffle ARG0
|
||||
def mask0 : MaskExpand;
|
||||
// mask0 - The initial sequence of lanes for shuffle ARG1
|
||||
def mask1 : MaskExpand;
|
||||
|
||||
def OP_NONE : Operation;
|
||||
def OP_UNAVAILABLE : Operation {
|
||||
let Unavailable = 1;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Instruction definitions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Every intrinsic subclasses "Inst". An intrinsic has a name, a prototype and
|
||||
// a sequence of typespecs.
|
||||
//
|
||||
// The name is the base name of the intrinsic, for example "vget_lane". This is
|
||||
// then mangled by the tblgen backend to add type information ("vget_lane_s16").
|
||||
//
|
||||
// A typespec is a sequence of uppercase characters (modifiers) followed by one
|
||||
// lowercase character. A typespec encodes a particular "base type" of the
|
||||
// intrinsic.
|
||||
//
|
||||
// An example typespec is "Qs" - quad-size short - uint16x8_t. The available
|
||||
// typespec codes are given below.
|
||||
//
|
||||
// The string given to an Inst class is a sequence of typespecs. The intrinsic
|
||||
// is instantiated for every typespec in the sequence. For example "sdQsQd".
|
||||
//
|
||||
// The prototype is a string that defines the return type of the intrinsic
|
||||
// and the type of each argument. The return type and every argument gets a
|
||||
// "modifier" that can change in some way the "base type" of the intrinsic.
|
||||
//
|
||||
// The modifier 'd' means "default" and does not modify the base type in any
|
||||
// way. The available modifiers are given below.
|
||||
//
|
||||
// Typespecs
|
||||
// ---------
|
||||
// c: char
|
||||
// s: short
|
||||
// i: int
|
||||
// l: long
|
||||
// k: 128-bit long
|
||||
// f: float
|
||||
// h: half-float
|
||||
// d: double
|
||||
//
|
||||
// Typespec modifiers
|
||||
// ------------------
|
||||
// S: scalar, only used for function mangling.
|
||||
// U: unsigned
|
||||
// Q: 128b
|
||||
// H: 128b without mangling 'q'
|
||||
// P: polynomial
|
||||
//
|
||||
// Prototype modifiers
|
||||
// -------------------
|
||||
// prototype: return (arg, arg, ...)
|
||||
//
|
||||
// v: void
|
||||
// t: best-fit integer (int/poly args)
|
||||
// x: signed integer (int/float args)
|
||||
// u: unsigned integer (int/float args)
|
||||
// f: float (int args)
|
||||
// F: double (int args)
|
||||
// H: half (int args)
|
||||
// d: default
|
||||
// g: default, ignore 'Q' size modifier.
|
||||
// j: default, force 'Q' size modifier.
|
||||
// w: double width elements, same num elts
|
||||
// n: double width elements, half num elts
|
||||
// h: half width elements, double num elts
|
||||
// q: half width elements, quad num elts
|
||||
// e: half width elements, double num elts, unsigned
|
||||
// m: half width elements, same num elts
|
||||
// i: constant int
|
||||
// l: constant uint64
|
||||
// s: scalar of element type
|
||||
// z: scalar of half width element type, signed
|
||||
// r: scalar of double width element type, signed
|
||||
// a: scalar of element type (splat to vector type)
|
||||
// b: scalar of unsigned integer/long type (int/float args)
|
||||
// $: scalar of signed integer/long type (int/float args)
|
||||
// y: scalar of float
|
||||
// o: scalar of double
|
||||
// k: default elt width, double num elts
|
||||
// 2,3,4: array of default vectors
|
||||
// B,C,D: array of default elts, force 'Q' size modifier.
|
||||
// p: pointer type
|
||||
// c: const pointer type
|
||||
|
||||
// Every intrinsic subclasses Inst.
|
||||
class Inst <string n, string p, string t, Operation o> {
|
||||
string Name = n;
|
||||
string Prototype = p;
|
||||
string Types = t;
|
||||
string ArchGuard = "";
|
||||
|
||||
Operation Operation = o;
|
||||
bit CartesianProductOfTypes = 0;
|
||||
bit BigEndianSafe = 0;
|
||||
bit isShift = 0;
|
||||
bit isScalarShift = 0;
|
||||
bit isScalarNarrowShift = 0;
|
||||
bit isVCVT_N = 0;
|
||||
// For immediate checks: the immediate will be assumed to specify the lane of
|
||||
// a Q register. Only used for intrinsics which end up calling polymorphic
|
||||
// builtins.
|
||||
bit isLaneQ = 0;
|
||||
|
||||
// Certain intrinsics have different names than their representative
|
||||
// instructions. This field allows us to handle this correctly when we
|
||||
// are generating tests.
|
||||
string InstName = "";
|
||||
|
||||
// Certain intrinsics even though they are not a WOpInst or LOpInst,
|
||||
// generate a WOpInst/LOpInst instruction (see below for definition
|
||||
// of a WOpInst/LOpInst). For testing purposes we need to know
|
||||
// this. Ex: vset_lane which outputs vmov instructions.
|
||||
bit isHiddenWInst = 0;
|
||||
bit isHiddenLInst = 0;
|
||||
}
|
||||
|
||||
// The following instruction classes are implemented via builtins.
|
||||
// These declarations are used to generate Builtins.def:
|
||||
//
|
||||
// SInst: Instruction with signed/unsigned suffix (e.g., "s8", "u8", "p8")
|
||||
// IInst: Instruction with generic integer suffix (e.g., "i8")
|
||||
// WInst: Instruction with only bit size suffix (e.g., "8")
|
||||
class SInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {}
|
||||
class IInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {}
|
||||
class WInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {}
|
||||
|
||||
// The following instruction classes are implemented via operators
|
||||
// instead of builtins. As such these declarations are only used for
|
||||
// the purpose of generating tests.
|
||||
//
|
||||
// SOpInst: Instruction with signed/unsigned suffix (e.g., "s8",
|
||||
// "u8", "p8").
|
||||
// IOpInst: Instruction with generic integer suffix (e.g., "i8").
|
||||
// WOpInst: Instruction with bit size only suffix (e.g., "8").
|
||||
// LOpInst: Logical instruction with no bit size suffix.
|
||||
// NoTestOpInst: Intrinsic that has no corresponding instruction.
|
||||
class SOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
|
||||
class IOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
|
||||
class WOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
|
||||
class LOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
|
||||
class NoTestOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
|
|
@ -183,6 +183,8 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts,
|
|||
|
||||
if ((FPU & NeonMode) && HasFullFP16)
|
||||
Builder.defineMacro("__ARM_FEATURE_FP16_VECTOR_ARITHMETIC", "1");
|
||||
if (HasFullFP16)
|
||||
Builder.defineMacro("__ARM_FEATURE_FP16_SCALAR_ARITHMETIC", "1");
|
||||
|
||||
switch (ArchKind) {
|
||||
default:
|
||||
|
|
|
@ -4101,6 +4101,54 @@ static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
|
|||
NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
|
||||
NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
|
||||
NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
|
||||
// FP16 scalar intrinisics go here.
|
||||
NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
|
||||
NEONMAP1(vabsh_f16, aarch64_neon_abs, Add1ArgType),
|
||||
NEONMAP1(vcageh_f16, aarch64_neon_facge, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vcagth_f16, aarch64_neon_facgt, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vcaleh_f16, aarch64_neon_facge, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vcalth_f16, aarch64_neon_facgt, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vcvtah_s16_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vcvtah_u16_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vcvth_n_f16_s16, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vcvth_n_f16_u16, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vcvth_n_s16_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vcvth_n_u16_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vcvtmh_s16_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vcvtmh_u16_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vcvtnh_s16_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vcvtnh_u16_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vcvtph_s16_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vcvtph_u16_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
|
||||
NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
|
||||
NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
|
||||
NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
|
||||
NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
|
||||
NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
|
||||
};
|
||||
|
||||
#undef NEONMAP0
|
||||
|
@ -6125,6 +6173,58 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|||
return Builder.CreateUIToFP(Ops[0], FTy);
|
||||
return Builder.CreateSIToFP(Ops[0], FTy);
|
||||
}
|
||||
case NEON::BI__builtin_neon_vcvth_f16_u16:
|
||||
case NEON::BI__builtin_neon_vcvth_f16_u32:
|
||||
case NEON::BI__builtin_neon_vcvth_f16_u64:
|
||||
usgn = true;
|
||||
// FALL THROUGH
|
||||
case NEON::BI__builtin_neon_vcvth_f16_s16:
|
||||
case NEON::BI__builtin_neon_vcvth_f16_s32:
|
||||
case NEON::BI__builtin_neon_vcvth_f16_s64: {
|
||||
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
||||
llvm::Type *FTy = HalfTy;
|
||||
llvm::Type *InTy;
|
||||
if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
|
||||
InTy = Int64Ty;
|
||||
else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
|
||||
InTy = Int32Ty;
|
||||
else
|
||||
InTy = Int16Ty;
|
||||
Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
|
||||
if (usgn)
|
||||
return Builder.CreateUIToFP(Ops[0], FTy);
|
||||
return Builder.CreateSIToFP(Ops[0], FTy);
|
||||
}
|
||||
case NEON::BI__builtin_neon_vcvth_u16_f16:
|
||||
usgn = true;
|
||||
// FALL THROUGH
|
||||
case NEON::BI__builtin_neon_vcvth_s16_f16: {
|
||||
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
||||
Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
|
||||
if (usgn)
|
||||
return Builder.CreateFPToUI(Ops[0], Int16Ty);
|
||||
return Builder.CreateFPToSI(Ops[0], Int16Ty);
|
||||
}
|
||||
case NEON::BI__builtin_neon_vcvth_u32_f16:
|
||||
usgn = true;
|
||||
// FALL THROUGH
|
||||
case NEON::BI__builtin_neon_vcvth_s32_f16: {
|
||||
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
||||
Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
|
||||
if (usgn)
|
||||
return Builder.CreateFPToUI(Ops[0], Int32Ty);
|
||||
return Builder.CreateFPToSI(Ops[0], Int32Ty);
|
||||
}
|
||||
case NEON::BI__builtin_neon_vcvth_u64_f16:
|
||||
usgn = true;
|
||||
// FALL THROUGH
|
||||
case NEON::BI__builtin_neon_vcvth_s64_f16: {
|
||||
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
||||
Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
|
||||
if (usgn)
|
||||
return Builder.CreateFPToUI(Ops[0], Int64Ty);
|
||||
return Builder.CreateFPToSI(Ops[0], Int64Ty);
|
||||
}
|
||||
case NEON::BI__builtin_neon_vpaddd_s64: {
|
||||
llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
|
||||
Value *Vec = EmitScalarExpr(E->getArg(0));
|
||||
|
@ -6166,6 +6266,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|||
case NEON::BI__builtin_neon_vceqzd_s64:
|
||||
case NEON::BI__builtin_neon_vceqzd_f64:
|
||||
case NEON::BI__builtin_neon_vceqzs_f32:
|
||||
case NEON::BI__builtin_neon_vceqzh_f16:
|
||||
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
||||
return EmitAArch64CompareBuiltinExpr(
|
||||
Ops[0], ConvertType(E->getCallReturnType(getContext())),
|
||||
|
@ -6173,6 +6274,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|||
case NEON::BI__builtin_neon_vcgezd_s64:
|
||||
case NEON::BI__builtin_neon_vcgezd_f64:
|
||||
case NEON::BI__builtin_neon_vcgezs_f32:
|
||||
case NEON::BI__builtin_neon_vcgezh_f16:
|
||||
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
||||
return EmitAArch64CompareBuiltinExpr(
|
||||
Ops[0], ConvertType(E->getCallReturnType(getContext())),
|
||||
|
@ -6180,6 +6282,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|||
case NEON::BI__builtin_neon_vclezd_s64:
|
||||
case NEON::BI__builtin_neon_vclezd_f64:
|
||||
case NEON::BI__builtin_neon_vclezs_f32:
|
||||
case NEON::BI__builtin_neon_vclezh_f16:
|
||||
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
||||
return EmitAArch64CompareBuiltinExpr(
|
||||
Ops[0], ConvertType(E->getCallReturnType(getContext())),
|
||||
|
@ -6187,6 +6290,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|||
case NEON::BI__builtin_neon_vcgtzd_s64:
|
||||
case NEON::BI__builtin_neon_vcgtzd_f64:
|
||||
case NEON::BI__builtin_neon_vcgtzs_f32:
|
||||
case NEON::BI__builtin_neon_vcgtzh_f16:
|
||||
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
||||
return EmitAArch64CompareBuiltinExpr(
|
||||
Ops[0], ConvertType(E->getCallReturnType(getContext())),
|
||||
|
@ -6194,6 +6298,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|||
case NEON::BI__builtin_neon_vcltzd_s64:
|
||||
case NEON::BI__builtin_neon_vcltzd_f64:
|
||||
case NEON::BI__builtin_neon_vcltzs_f32:
|
||||
case NEON::BI__builtin_neon_vcltzh_f16:
|
||||
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
||||
return EmitAArch64CompareBuiltinExpr(
|
||||
Ops[0], ConvertType(E->getCallReturnType(getContext())),
|
||||
|
@ -6246,6 +6351,26 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|||
Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
|
||||
return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
|
||||
}
|
||||
case NEON::BI__builtin_neon_vceqh_f16:
|
||||
case NEON::BI__builtin_neon_vcleh_f16:
|
||||
case NEON::BI__builtin_neon_vclth_f16:
|
||||
case NEON::BI__builtin_neon_vcgeh_f16:
|
||||
case NEON::BI__builtin_neon_vcgth_f16: {
|
||||
llvm::CmpInst::Predicate P;
|
||||
switch (BuiltinID) {
|
||||
default: llvm_unreachable("missing builtin ID in switch!");
|
||||
case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
|
||||
case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
|
||||
case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
|
||||
case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
|
||||
case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
|
||||
}
|
||||
Ops.push_back(EmitScalarExpr(E->getArg(1)));
|
||||
Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
|
||||
Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
|
||||
Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
|
||||
return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
|
||||
}
|
||||
case NEON::BI__builtin_neon_vceqd_s64:
|
||||
case NEON::BI__builtin_neon_vceqd_u64:
|
||||
case NEON::BI__builtin_neon_vcgtd_s64:
|
||||
|
@ -6383,6 +6508,31 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|||
llvm::VectorType::get(DoubleTy, 2));
|
||||
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
|
||||
"vgetq_lane");
|
||||
case NEON::BI__builtin_neon_vaddh_f16:
|
||||
Ops.push_back(EmitScalarExpr(E->getArg(1)));
|
||||
return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
|
||||
case NEON::BI__builtin_neon_vsubh_f16:
|
||||
Ops.push_back(EmitScalarExpr(E->getArg(1)));
|
||||
return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
|
||||
case NEON::BI__builtin_neon_vmulh_f16:
|
||||
Ops.push_back(EmitScalarExpr(E->getArg(1)));
|
||||
return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
|
||||
case NEON::BI__builtin_neon_vdivh_f16:
|
||||
Ops.push_back(EmitScalarExpr(E->getArg(1)));
|
||||
return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
|
||||
case NEON::BI__builtin_neon_vfmah_f16: {
|
||||
Value *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy);
|
||||
// NEON intrinsic puts accumulator first, unlike the LLVM fma.
|
||||
return Builder.CreateCall(F,
|
||||
{EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
|
||||
}
|
||||
case NEON::BI__builtin_neon_vfmsh_f16: {
|
||||
Value *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy);
|
||||
Value *Zero = llvm::ConstantFP::getZeroValueForNegation(HalfTy);
|
||||
Value* Sub = Builder.CreateFSub(Zero, EmitScalarExpr(E->getArg(1)), "vsubh");
|
||||
// NEON intrinsic puts accumulator first, unlike the LLVM fma.
|
||||
return Builder.CreateCall(F, {Sub, EmitScalarExpr(E->getArg(2)), Ops[0]});
|
||||
}
|
||||
case NEON::BI__builtin_neon_vaddd_s64:
|
||||
case NEON::BI__builtin_neon_vaddd_u64:
|
||||
return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
|
||||
|
@ -6657,12 +6807,22 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|||
Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
|
||||
if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
|
||||
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
|
||||
case NEON::BI__builtin_neon_vmaxh_f16: {
|
||||
Ops.push_back(EmitScalarExpr(E->getArg(1)));
|
||||
Int = Intrinsic::aarch64_neon_fmax;
|
||||
return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
|
||||
}
|
||||
case NEON::BI__builtin_neon_vmin_v:
|
||||
case NEON::BI__builtin_neon_vminq_v:
|
||||
// FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
|
||||
Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
|
||||
if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
|
||||
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
|
||||
case NEON::BI__builtin_neon_vminh_f16: {
|
||||
Ops.push_back(EmitScalarExpr(E->getArg(1)));
|
||||
Int = Intrinsic::aarch64_neon_fmin;
|
||||
return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
|
||||
}
|
||||
case NEON::BI__builtin_neon_vabd_v:
|
||||
case NEON::BI__builtin_neon_vabdq_v:
|
||||
// FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
|
||||
|
@ -6701,20 +6861,31 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|||
case NEON::BI__builtin_neon_vminnmq_v:
|
||||
Int = Intrinsic::aarch64_neon_fminnm;
|
||||
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
|
||||
case NEON::BI__builtin_neon_vminnmh_f16:
|
||||
Ops.push_back(EmitScalarExpr(E->getArg(1)));
|
||||
Int = Intrinsic::aarch64_neon_fminnm;
|
||||
return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
|
||||
case NEON::BI__builtin_neon_vmaxnm_v:
|
||||
case NEON::BI__builtin_neon_vmaxnmq_v:
|
||||
Int = Intrinsic::aarch64_neon_fmaxnm;
|
||||
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
|
||||
case NEON::BI__builtin_neon_vmaxnmh_f16:
|
||||
Ops.push_back(EmitScalarExpr(E->getArg(1)));
|
||||
Int = Intrinsic::aarch64_neon_fmaxnm;
|
||||
return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
|
||||
case NEON::BI__builtin_neon_vrecpss_f32: {
|
||||
Ops.push_back(EmitScalarExpr(E->getArg(1)));
|
||||
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
|
||||
Ops, "vrecps");
|
||||
}
|
||||
case NEON::BI__builtin_neon_vrecpsd_f64: {
|
||||
case NEON::BI__builtin_neon_vrecpsd_f64:
|
||||
Ops.push_back(EmitScalarExpr(E->getArg(1)));
|
||||
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
|
||||
Ops, "vrecps");
|
||||
}
|
||||
case NEON::BI__builtin_neon_vrecpsh_f16:
|
||||
Ops.push_back(EmitScalarExpr(E->getArg(1)));
|
||||
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
|
||||
Ops, "vrecps");
|
||||
case NEON::BI__builtin_neon_vqshrun_n_v:
|
||||
Int = Intrinsic::aarch64_neon_sqshrun;
|
||||
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
|
||||
|
@ -6730,36 +6901,71 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|||
case NEON::BI__builtin_neon_vqrshrn_n_v:
|
||||
Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
|
||||
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
|
||||
case NEON::BI__builtin_neon_vrndah_f16: {
|
||||
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
||||
Int = Intrinsic::round;
|
||||
return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
|
||||
}
|
||||
case NEON::BI__builtin_neon_vrnda_v:
|
||||
case NEON::BI__builtin_neon_vrndaq_v: {
|
||||
Int = Intrinsic::round;
|
||||
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
|
||||
}
|
||||
case NEON::BI__builtin_neon_vrndih_f16: {
|
||||
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
||||
Int = Intrinsic::nearbyint;
|
||||
return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
|
||||
}
|
||||
case NEON::BI__builtin_neon_vrndi_v:
|
||||
case NEON::BI__builtin_neon_vrndiq_v: {
|
||||
Int = Intrinsic::nearbyint;
|
||||
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi");
|
||||
}
|
||||
case NEON::BI__builtin_neon_vrndmh_f16: {
|
||||
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
||||
Int = Intrinsic::floor;
|
||||
return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
|
||||
}
|
||||
case NEON::BI__builtin_neon_vrndm_v:
|
||||
case NEON::BI__builtin_neon_vrndmq_v: {
|
||||
Int = Intrinsic::floor;
|
||||
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
|
||||
}
|
||||
case NEON::BI__builtin_neon_vrndnh_f16: {
|
||||
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
||||
Int = Intrinsic::aarch64_neon_frintn;
|
||||
return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
|
||||
}
|
||||
case NEON::BI__builtin_neon_vrndn_v:
|
||||
case NEON::BI__builtin_neon_vrndnq_v: {
|
||||
Int = Intrinsic::aarch64_neon_frintn;
|
||||
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
|
||||
}
|
||||
case NEON::BI__builtin_neon_vrndph_f16: {
|
||||
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
||||
Int = Intrinsic::ceil;
|
||||
return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
|
||||
}
|
||||
case NEON::BI__builtin_neon_vrndp_v:
|
||||
case NEON::BI__builtin_neon_vrndpq_v: {
|
||||
Int = Intrinsic::ceil;
|
||||
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
|
||||
}
|
||||
case NEON::BI__builtin_neon_vrndxh_f16: {
|
||||
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
||||
Int = Intrinsic::rint;
|
||||
return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
|
||||
}
|
||||
case NEON::BI__builtin_neon_vrndx_v:
|
||||
case NEON::BI__builtin_neon_vrndxq_v: {
|
||||
Int = Intrinsic::rint;
|
||||
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
|
||||
}
|
||||
case NEON::BI__builtin_neon_vrndh_f16: {
|
||||
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
||||
Int = Intrinsic::trunc;
|
||||
return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
|
||||
}
|
||||
case NEON::BI__builtin_neon_vrnd_v:
|
||||
case NEON::BI__builtin_neon_vrndq_v: {
|
||||
Int = Intrinsic::trunc;
|
||||
|
@ -6908,6 +7114,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|||
}
|
||||
case NEON::BI__builtin_neon_vnegd_s64:
|
||||
return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
|
||||
case NEON::BI__builtin_neon_vnegh_f16:
|
||||
return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
|
||||
case NEON::BI__builtin_neon_vpmaxnm_v:
|
||||
case NEON::BI__builtin_neon_vpmaxnmq_v: {
|
||||
Int = Intrinsic::aarch64_neon_fmaxnmp;
|
||||
|
@ -6918,6 +7126,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|||
Int = Intrinsic::aarch64_neon_fminnmp;
|
||||
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
|
||||
}
|
||||
case NEON::BI__builtin_neon_vsqrth_f16: {
|
||||
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
||||
Int = Intrinsic::sqrt;
|
||||
return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
|
||||
}
|
||||
case NEON::BI__builtin_neon_vsqrt_v:
|
||||
case NEON::BI__builtin_neon_vsqrtq_v: {
|
||||
Int = Intrinsic::sqrt;
|
||||
|
|
|
@ -116,7 +116,12 @@ set(output_dir ${LLVM_LIBRARY_OUTPUT_INTDIR}/clang/${CLANG_VERSION}/include)
|
|||
|
||||
# Generate arm_neon.h
|
||||
clang_tablegen(arm_neon.h -gen-arm-neon
|
||||
-I ${CLANG_SOURCE_DIR}/include/clang/Basic/
|
||||
SOURCE ${CLANG_SOURCE_DIR}/include/clang/Basic/arm_neon.td)
|
||||
# Generate arm_fp16.h
|
||||
clang_tablegen(arm_fp16.h -gen-arm-fp16
|
||||
-I ${CLANG_SOURCE_DIR}/include/clang/Basic/
|
||||
SOURCE ${CLANG_SOURCE_DIR}/include/clang/Basic/arm_fp16.td)
|
||||
|
||||
set(out_files)
|
||||
foreach( f ${files} ${cuda_wrapper_files} )
|
||||
|
@ -134,6 +139,11 @@ add_custom_command(OUTPUT ${output_dir}/arm_neon.h
|
|||
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_BINARY_DIR}/arm_neon.h ${output_dir}/arm_neon.h
|
||||
COMMENT "Copying clang's arm_neon.h...")
|
||||
list(APPEND out_files ${output_dir}/arm_neon.h)
|
||||
add_custom_command(OUTPUT ${output_dir}/arm_fp16.h
|
||||
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/arm_fp16.h
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_BINARY_DIR}/arm_fp16.h ${output_dir}/arm_fp16.h
|
||||
COMMENT "Copying clang's arm_fp16.h...")
|
||||
list(APPEND out_files ${output_dir}/arm_fp16.h)
|
||||
|
||||
add_custom_target(clang-headers ALL DEPENDS ${out_files})
|
||||
set_target_properties(clang-headers PROPERTIES FOLDER "Misc")
|
||||
|
@ -144,6 +154,12 @@ install(
|
|||
PERMISSIONS OWNER_READ OWNER_WRITE GROUP_READ WORLD_READ
|
||||
DESTINATION lib${LLVM_LIBDIR_SUFFIX}/clang/${CLANG_VERSION}/include)
|
||||
|
||||
install(
|
||||
FILES ${files} ${CMAKE_CURRENT_BINARY_DIR}/arm_fp16.h
|
||||
COMPONENT clang-headers
|
||||
PERMISSIONS OWNER_READ OWNER_WRITE GROUP_READ WORLD_READ
|
||||
DESTINATION lib${LLVM_LIBDIR_SUFFIX}/clang/${CLANG_VERSION}/include)
|
||||
|
||||
install(
|
||||
FILES ${cuda_wrapper_files}
|
||||
COMPONENT clang-headers
|
||||
|
|
|
@ -38,6 +38,7 @@ module _Builtin_intrinsics [system] [extern_c] {
|
|||
explicit module neon {
|
||||
requires neon
|
||||
header "arm_neon.h"
|
||||
header "arm_fp16.h"
|
||||
export *
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1353,6 +1353,7 @@ bool Sema::CheckNeonBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
|
|||
switch (BuiltinID) {
|
||||
#define GET_NEON_OVERLOAD_CHECK
|
||||
#include "clang/Basic/arm_neon.inc"
|
||||
#include "clang/Basic/arm_fp16.inc"
|
||||
#undef GET_NEON_OVERLOAD_CHECK
|
||||
}
|
||||
|
||||
|
@ -1404,6 +1405,7 @@ bool Sema::CheckNeonBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
|
|||
return false;
|
||||
#define GET_NEON_IMMEDIATE_CHECK
|
||||
#include "clang/Basic/arm_neon.inc"
|
||||
#include "clang/Basic/arm_fp16.inc"
|
||||
#undef GET_NEON_IMMEDIATE_CHECK
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,643 @@
|
|||
// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +fullfp16\
|
||||
// RUN: -fallow-half-arguments-and-returns -S -disable-O0-optnone -emit-llvm -o - %s \
|
||||
// RUN: | opt -S -mem2reg \
|
||||
// RUN: | FileCheck %s
|
||||
|
||||
// REQUIRES: aarch64-registered-target
|
||||
|
||||
#include <arm_fp16.h>
|
||||
|
||||
// CHECK-LABEL: test_vabsh_f16
|
||||
// CHECK: [[ABS:%.*]] = call half @llvm.aarch64.neon.abs.f16(half %a)
|
||||
// CHECK: ret half [[ABS]]
|
||||
float16_t test_vabsh_f16(float16_t a) {
|
||||
return vabsh_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vceqzh_f16
|
||||
// CHECK: [[TMP1:%.*]] = fcmp oeq half %a, 0xH0000
|
||||
// CHECK: [[TMP2:%.*]] = sext i1 [[TMP1]] to i16
|
||||
// CHECK: ret i16 [[TMP2]]
|
||||
uint16_t test_vceqzh_f16(float16_t a) {
|
||||
return vceqzh_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcgezh_f16
|
||||
// CHECK: [[TMP1:%.*]] = fcmp oge half %a, 0xH0000
|
||||
// CHECK: [[TMP2:%.*]] = sext i1 [[TMP1]] to i16
|
||||
// CHECK: ret i16 [[TMP2]]
|
||||
uint16_t test_vcgezh_f16(float16_t a) {
|
||||
return vcgezh_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcgtzh_f16
|
||||
// CHECK: [[TMP1:%.*]] = fcmp ogt half %a, 0xH0000
|
||||
// CHECK: [[TMP2:%.*]] = sext i1 [[TMP1]] to i16
|
||||
// CHECK: ret i16 [[TMP2]]
|
||||
uint16_t test_vcgtzh_f16(float16_t a) {
|
||||
return vcgtzh_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vclezh_f16
|
||||
// CHECK: [[TMP1:%.*]] = fcmp ole half %a, 0xH0000
|
||||
// CHECK: [[TMP2:%.*]] = sext i1 [[TMP1]] to i16
|
||||
// CHECK: ret i16 [[TMP2]]
|
||||
uint16_t test_vclezh_f16(float16_t a) {
|
||||
return vclezh_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcltzh_f16
|
||||
// CHECK: [[TMP1:%.*]] = fcmp olt half %a, 0xH0000
|
||||
// CHECK: [[TMP2:%.*]] = sext i1 [[TMP1]] to i16
|
||||
// CHECK: ret i16 [[TMP2]]
|
||||
uint16_t test_vcltzh_f16(float16_t a) {
|
||||
return vcltzh_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvth_f16_s16
|
||||
// CHECK: [[VCVT:%.*]] = sitofp i16 %a to half
|
||||
// CHECK: ret half [[VCVT]]
|
||||
float16_t test_vcvth_f16_s16 (int16_t a) {
|
||||
return vcvth_f16_s16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvth_f16_s32
|
||||
// CHECK: [[VCVT:%.*]] = sitofp i32 %a to half
|
||||
// CHECK: ret half [[VCVT]]
|
||||
float16_t test_vcvth_f16_s32 (int32_t a) {
|
||||
return vcvth_f16_s32(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvth_f16_s64
|
||||
// CHECK: [[VCVT:%.*]] = sitofp i64 %a to half
|
||||
// CHECK: ret half [[VCVT]]
|
||||
float16_t test_vcvth_f16_s64 (int64_t a) {
|
||||
return vcvth_f16_s64(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvth_f16_u16
|
||||
// CHECK: [[VCVT:%.*]] = uitofp i16 %a to half
|
||||
// CHECK: ret half [[VCVT]]
|
||||
float16_t test_vcvth_f16_u16 (uint16_t a) {
|
||||
return vcvth_f16_u16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvth_f16_u32
|
||||
// CHECK: [[VCVT:%.*]] = uitofp i32 %a to half
|
||||
// CHECK: ret half [[VCVT]]
|
||||
float16_t test_vcvth_f16_u32 (uint32_t a) {
|
||||
return vcvth_f16_u32(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvth_f16_u64
|
||||
// CHECK: [[VCVT:%.*]] = uitofp i64 %a to half
|
||||
// CHECK: ret half [[VCVT]]
|
||||
float16_t test_vcvth_f16_u64 (uint64_t a) {
|
||||
return vcvth_f16_u64(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvth_s16_f16
|
||||
// CHECK: [[VCVT:%.*]] = fptosi half %a to i16
|
||||
// CHECK: ret i16 [[VCVT]]
|
||||
int16_t test_vcvth_s16_f16 (float16_t a) {
|
||||
return vcvth_s16_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvth_s32_f16
|
||||
// CHECK: [[VCVT:%.*]] = fptosi half %a to i32
|
||||
// CHECK: ret i32 [[VCVT]]
|
||||
int32_t test_vcvth_s32_f16 (float16_t a) {
|
||||
return vcvth_s32_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvth_s64_f16
|
||||
// CHECK: [[VCVT:%.*]] = fptosi half %a to i64
|
||||
// CHECK: ret i64 [[VCVT]]
|
||||
int64_t test_vcvth_s64_f16 (float16_t a) {
|
||||
return vcvth_s64_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvth_u16_f16
|
||||
// CHECK: [[VCVT:%.*]] = fptoui half %a to i16
|
||||
// CHECK: ret i16 [[VCVT]]
|
||||
uint16_t test_vcvth_u16_f16 (float16_t a) {
|
||||
return vcvth_u16_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvth_u32_f16
|
||||
// CHECK: [[VCVT:%.*]] = fptoui half %a to i32
|
||||
// CHECK: ret i32 [[VCVT]]
|
||||
uint32_t test_vcvth_u32_f16 (float16_t a) {
|
||||
return vcvth_u32_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvth_u64_f16
|
||||
// CHECK: [[VCVT:%.*]] = fptoui half %a to i64
|
||||
// CHECK: ret i64 [[VCVT]]
|
||||
uint64_t test_vcvth_u64_f16 (float16_t a) {
|
||||
return vcvth_u64_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvtah_s16_f16
|
||||
// CHECK: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtas.i16.f16(half %a)
|
||||
// CHECK: ret i16 [[VCVT]]
|
||||
int16_t test_vcvtah_s16_f16 (float16_t a) {
|
||||
return vcvtah_s16_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvtah_s32_f16
|
||||
// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtas.i32.f16(half %a)
|
||||
// CHECK: ret i32 [[VCVT]]
|
||||
int32_t test_vcvtah_s32_f16 (float16_t a) {
|
||||
return vcvtah_s32_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvtah_s64_f16
|
||||
// CHECK: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtas.i64.f16(half %a)
|
||||
// CHECK: ret i64 [[VCVT]]
|
||||
int64_t test_vcvtah_s64_f16 (float16_t a) {
|
||||
return vcvtah_s64_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvtah_u16_f16
|
||||
// CHECK: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtau.i16.f16(half %a)
|
||||
// CHECK: ret i16 [[VCVT]]
|
||||
uint16_t test_vcvtah_u16_f16 (float16_t a) {
|
||||
return vcvtah_u16_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvtah_u32_f16
|
||||
// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtau.i32.f16(half %a)
|
||||
// CHECK: ret i32 [[VCVT]]
|
||||
uint32_t test_vcvtah_u32_f16 (float16_t a) {
|
||||
return vcvtah_u32_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvtah_u64_f16
|
||||
// CHECK: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtau.i64.f16(half %a)
|
||||
// CHECK: ret i64 [[VCVT]]
|
||||
uint64_t test_vcvtah_u64_f16 (float16_t a) {
|
||||
return vcvtah_u64_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvtmh_s16_f16
|
||||
// CHECK: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtms.i16.f16(half %a)
|
||||
// CHECK: ret i16 [[VCVT]]
|
||||
int16_t test_vcvtmh_s16_f16 (float16_t a) {
|
||||
return vcvtmh_s16_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvtmh_s32_f16
|
||||
// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtms.i32.f16(half %a)
|
||||
// CHECK: ret i32 [[VCVT]]
|
||||
int32_t test_vcvtmh_s32_f16 (float16_t a) {
|
||||
return vcvtmh_s32_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvtmh_s64_f16
|
||||
// CHECK: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtms.i64.f16(half %a)
|
||||
// CHECK: ret i64 [[VCVT]]
|
||||
int64_t test_vcvtmh_s64_f16 (float16_t a) {
|
||||
return vcvtmh_s64_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvtmh_u16_f16
|
||||
// CHECK: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtmu.i16.f16(half %a)
|
||||
// CHECK: ret i16 [[VCVT]]
|
||||
uint16_t test_vcvtmh_u16_f16 (float16_t a) {
|
||||
return vcvtmh_u16_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvtmh_u32_f16
|
||||
// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtmu.i32.f16(half %a)
|
||||
// CHECK: ret i32 [[VCVT]]
|
||||
uint32_t test_vcvtmh_u32_f16 (float16_t a) {
|
||||
return vcvtmh_u32_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvtmh_u64_f16
|
||||
// CHECK: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtmu.i64.f16(half %a)
|
||||
// CHECK: ret i64 [[VCVT]]
|
||||
uint64_t test_vcvtmh_u64_f16 (float16_t a) {
|
||||
return vcvtmh_u64_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvtnh_s16_f16
|
||||
// CHECK: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtns.i16.f16(half %a)
|
||||
// CHECK: ret i16 [[VCVT]]
|
||||
int16_t test_vcvtnh_s16_f16 (float16_t a) {
|
||||
return vcvtnh_s16_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvtnh_s32_f16
|
||||
// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtns.i32.f16(half %a)
|
||||
// CHECK: ret i32 [[VCVT]]
|
||||
int32_t test_vcvtnh_s32_f16 (float16_t a) {
|
||||
return vcvtnh_s32_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvtnh_s64_f16
|
||||
// CHECK: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtns.i64.f16(half %a)
|
||||
// CHECK: ret i64 [[VCVT]]
|
||||
int64_t test_vcvtnh_s64_f16 (float16_t a) {
|
||||
return vcvtnh_s64_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvtnh_u16_f16
|
||||
// CHECK: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtnu.i16.f16(half %a)
|
||||
// CHECK: ret i16 [[VCVT]]
|
||||
uint16_t test_vcvtnh_u16_f16 (float16_t a) {
|
||||
return vcvtnh_u16_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvtnh_u32_f16
|
||||
// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtnu.i32.f16(half %a)
|
||||
// CHECK: ret i32 [[VCVT]]
|
||||
uint32_t test_vcvtnh_u32_f16 (float16_t a) {
|
||||
return vcvtnh_u32_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvtnh_u64_f16
|
||||
// CHECK: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtnu.i64.f16(half %a)
|
||||
// CHECK: ret i64 [[VCVT]]
|
||||
uint64_t test_vcvtnh_u64_f16 (float16_t a) {
|
||||
return vcvtnh_u64_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvtph_s16_f16
|
||||
// CHECK: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtps.i16.f16(half %a)
|
||||
// CHECK: ret i16 [[VCVT]]
|
||||
int16_t test_vcvtph_s16_f16 (float16_t a) {
|
||||
return vcvtph_s16_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvtph_s32_f16
|
||||
// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtps.i32.f16(half %a)
|
||||
// CHECK: ret i32 [[VCVT]]
|
||||
int32_t test_vcvtph_s32_f16 (float16_t a) {
|
||||
return vcvtph_s32_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvtph_s64_f16
|
||||
// CHECK: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtps.i64.f16(half %a)
|
||||
// CHECK: ret i64 [[VCVT]]
|
||||
int64_t test_vcvtph_s64_f16 (float16_t a) {
|
||||
return vcvtph_s64_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvtph_u16_f16
|
||||
// CHECK: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtpu.i16.f16(half %a)
|
||||
// CHECK: ret i16 [[VCVT]]
|
||||
uint16_t test_vcvtph_u16_f16 (float16_t a) {
|
||||
return vcvtph_u16_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvtph_u32_f16
|
||||
// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtpu.i32.f16(half %a)
|
||||
// CHECK: ret i32 [[VCVT]]
|
||||
uint32_t test_vcvtph_u32_f16 (float16_t a) {
|
||||
return vcvtph_u32_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvtph_u64_f16
|
||||
// CHECK: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtpu.i64.f16(half %a)
|
||||
// CHECK: ret i64 [[VCVT]]
|
||||
uint64_t test_vcvtph_u64_f16 (float16_t a) {
|
||||
return vcvtph_u64_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vnegh_f16
|
||||
// CHECK: [[NEG:%.*]] = fsub half 0xH8000, %a
|
||||
// CHECK: ret half [[NEG]]
|
||||
float16_t test_vnegh_f16(float16_t a) {
|
||||
return vnegh_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vrecpeh_f16
|
||||
// CHECK: [[VREC:%.*]] = call half @llvm.aarch64.neon.frecpe.f16(half %a)
|
||||
// CHECK: ret half [[VREC]]
|
||||
float16_t test_vrecpeh_f16(float16_t a) {
|
||||
return vrecpeh_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vrecpxh_f16
|
||||
// CHECK: [[VREC:%.*]] = call half @llvm.aarch64.neon.frecpx.f16(half %a)
|
||||
// CHECK: ret half [[VREC]]
|
||||
float16_t test_vrecpxh_f16(float16_t a) {
|
||||
return vrecpxh_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vrndh_f16
|
||||
// CHECK: [[RND:%.*]] = call half @llvm.trunc.f16(half %a)
|
||||
// CHECK: ret half [[RND]]
|
||||
float16_t test_vrndh_f16(float16_t a) {
|
||||
return vrndh_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vrndah_f16
|
||||
// CHECK: [[RND:%.*]] = call half @llvm.round.f16(half %a)
|
||||
// CHECK: ret half [[RND]]
|
||||
float16_t test_vrndah_f16(float16_t a) {
|
||||
return vrndah_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vrndih_f16
|
||||
// CHECK: [[RND:%.*]] = call half @llvm.nearbyint.f16(half %a)
|
||||
// CHECK: ret half [[RND]]
|
||||
float16_t test_vrndih_f16(float16_t a) {
|
||||
return vrndih_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vrndmh_f16
|
||||
// CHECK: [[RND:%.*]] = call half @llvm.floor.f16(half %a)
|
||||
// CHECK: ret half [[RND]]
|
||||
float16_t test_vrndmh_f16(float16_t a) {
|
||||
return vrndmh_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vrndnh_f16
|
||||
// CHECK: [[RND:%.*]] = call half @llvm.aarch64.neon.frintn.f16(half %a)
|
||||
// CHECK: ret half [[RND]]
|
||||
float16_t test_vrndnh_f16(float16_t a) {
|
||||
return vrndnh_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vrndph_f16
|
||||
// CHECK: [[RND:%.*]] = call half @llvm.ceil.f16(half %a)
|
||||
// CHECK: ret half [[RND]]
|
||||
float16_t test_vrndph_f16(float16_t a) {
|
||||
return vrndph_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vrndxh_f16
|
||||
// CHECK: [[RND:%.*]] = call half @llvm.rint.f16(half %a)
|
||||
// CHECK: ret half [[RND]]
|
||||
float16_t test_vrndxh_f16(float16_t a) {
|
||||
return vrndxh_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vrsqrteh_f16
|
||||
// CHECK: [[RND:%.*]] = call half @llvm.aarch64.neon.frsqrte.f16(half %a)
|
||||
// CHECK: ret half [[RND]]
|
||||
float16_t test_vrsqrteh_f16(float16_t a) {
|
||||
return vrsqrteh_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vsqrth_f16
|
||||
// CHECK: [[SQR:%.*]] = call half @llvm.sqrt.f16(half %a)
|
||||
// CHECK: ret half [[SQR]]
|
||||
float16_t test_vsqrth_f16(float16_t a) {
|
||||
return vsqrth_f16(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vaddh_f16
|
||||
// CHECK: [[ADD:%.*]] = fadd half %a, %b
|
||||
// CHECK: ret half [[ADD]]
|
||||
float16_t test_vaddh_f16(float16_t a, float16_t b) {
|
||||
return vaddh_f16(a, b);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vabdh_f16
|
||||
// CHECK: [[ABD:%.*]] = call half @llvm.aarch64.sisd.fabd.f16(half %a, half %b)
|
||||
// CHECK: ret half [[ABD]]
|
||||
float16_t test_vabdh_f16(float16_t a, float16_t b) {
|
||||
return vabdh_f16(a, b);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcageh_f16
|
||||
// CHECK: [[ABS:%.*]] = call i16 @llvm.aarch64.neon.facge.i16.f16(half %a, half %b)
|
||||
// CHECK: ret i16 [[ABS]]
|
||||
uint16_t test_vcageh_f16(float16_t a, float16_t b) {
|
||||
return vcageh_f16(a, b);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcagth_f16
|
||||
// CHECK: [[ABS:%.*]] = call i16 @llvm.aarch64.neon.facgt.i16.f16(half %a, half %b)
|
||||
// CHECK: ret i16 [[ABS]]
|
||||
uint16_t test_vcagth_f16(float16_t a, float16_t b) {
|
||||
return vcagth_f16(a, b);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcaleh_f16
|
||||
// CHECK: [[ABS:%.*]] = call i16 @llvm.aarch64.neon.facge.i16.f16(half %a, half %b)
|
||||
// CHECK: ret i16 [[ABS]]
|
||||
uint16_t test_vcaleh_f16(float16_t a, float16_t b) {
|
||||
return vcaleh_f16(a, b);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcalth_f16
|
||||
// CHECK: [[ABS:%.*]] = call i16 @llvm.aarch64.neon.facgt.i16.f16(half %a, half %b)
|
||||
// CHECK: ret i16 [[ABS]]
|
||||
uint16_t test_vcalth_f16(float16_t a, float16_t b) {
|
||||
return vcalth_f16(a, b);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vceqh_f16
|
||||
// CHECK: [[TMP1:%.*]] = fcmp oeq half %a, %b
|
||||
// CHECK: [[TMP2:%.*]] = sext i1 [[TMP1]] to i16
|
||||
// CHECK: ret i16 [[TMP2]]
|
||||
uint16_t test_vceqh_f16(float16_t a, float16_t b) {
|
||||
return vceqh_f16(a, b);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcgeh_f16
|
||||
// CHECK: [[TMP1:%.*]] = fcmp oge half %a, %b
|
||||
// CHECK: [[TMP2:%.*]] = sext i1 [[TMP1]] to i16
|
||||
// CHECK: ret i16 [[TMP2]]
|
||||
uint16_t test_vcgeh_f16(float16_t a, float16_t b) {
|
||||
return vcgeh_f16(a, b);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcgth_f16
|
||||
//CHECK: [[TMP1:%.*]] = fcmp ogt half %a, %b
|
||||
// CHECK: [[TMP2:%.*]] = sext i1 [[TMP1]] to i16
|
||||
// CHECK: ret i16 [[TMP2]]
|
||||
uint16_t test_vcgth_f16(float16_t a, float16_t b) {
|
||||
return vcgth_f16(a, b);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcleh_f16
|
||||
// CHECK: [[TMP1:%.*]] = fcmp ole half %a, %b
|
||||
// CHECK: [[TMP2:%.*]] = sext i1 [[TMP1]] to i16
|
||||
// CHECK: ret i16 [[TMP2]]
|
||||
uint16_t test_vcleh_f16(float16_t a, float16_t b) {
|
||||
return vcleh_f16(a, b);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vclth_f16
|
||||
// CHECK: [[TMP1:%.*]] = fcmp olt half %a, %b
|
||||
// CHECK: [[TMP2:%.*]] = sext i1 [[TMP1]] to i16
|
||||
// CHECK: ret i16 [[TMP2]]
|
||||
uint16_t test_vclth_f16(float16_t a, float16_t b) {
|
||||
return vclth_f16(a, b);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvth_n_f16_s16
|
||||
// CHECK: [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i16(i16 %a, i32 0)
|
||||
// CHECK: ret half [[CVT]]
|
||||
float16_t test_vcvth_n_f16_s16(int16_t a) {
|
||||
return vcvth_n_f16_s16(a, 0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvth_n_f16_s32
|
||||
// CHECK: [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %a, i32 0)
|
||||
// CHECK: ret half [[CVT]]
|
||||
float16_t test_vcvth_n_f16_s32(int32_t a) {
|
||||
return vcvth_n_f16_s32(a, 0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvth_n_f16_s64
|
||||
// CHECK: [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i64(i64 %a, i32 0)
|
||||
// CHECK: ret half [[CVT]]
|
||||
float16_t test_vcvth_n_f16_s64(int64_t a) {
|
||||
return vcvth_n_f16_s64(a, 0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvth_n_s16_f16
|
||||
// CHECK: [[CVT:%.*]] = call i16 @llvm.aarch64.neon.vcvtfp2fxs.i16.f16(half %a, i32 0)
|
||||
// CHECK: ret i16 [[CVT]]
|
||||
int16_t test_vcvth_n_s16_f16(float16_t a) {
|
||||
return vcvth_n_s16_f16(a, 0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvth_n_s32_f16
|
||||
// CHECK: [[CVT:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f16(half %a, i32 0)
|
||||
// CHECK: ret i32 [[CVT]]
|
||||
int32_t test_vcvth_n_s32_f16(float16_t a) {
|
||||
return vcvth_n_s32_f16(a, 0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvth_n_s64_f16
|
||||
// CHECK: [[CVT:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f16(half %a, i32 0)
|
||||
// CHECK: ret i64 [[CVT]]
|
||||
int64_t test_vcvth_n_s64_f16(float16_t a) {
|
||||
return vcvth_n_s64_f16(a, 0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvth_n_f16_u16
|
||||
// CHECK: [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i16(i16 %a, i32 0)
|
||||
// CHECK: ret half [[CVT]]
|
||||
float16_t test_vcvth_n_f16_u16(int16_t a) {
|
||||
return vcvth_n_f16_u16(a, 0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvth_n_f16_u32
|
||||
// CHECK: [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 %a, i32 0)
|
||||
// CHECK: ret half [[CVT]]
|
||||
float16_t test_vcvth_n_f16_u32(int32_t a) {
|
||||
return vcvth_n_f16_u32(a, 0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvth_n_f16_u64
|
||||
// CHECK: [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i64(i64 %a, i32 0)
|
||||
// CHECK: ret half [[CVT]]
|
||||
float16_t test_vcvth_n_f16_u64(int64_t a) {
|
||||
return vcvth_n_f16_u64(a, 0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvth_n_u16_f16
|
||||
// CHECK: [[CVT:%.*]] = call i16 @llvm.aarch64.neon.vcvtfp2fxu.i16.f16(half %a, i32 0)
|
||||
// CHECK: ret i16 [[CVT]]
|
||||
int16_t test_vcvth_n_u16_f16(float16_t a) {
|
||||
return vcvth_n_u16_f16(a, 0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvth_n_u32_f16
|
||||
// CHECK: [[CVT:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half %a, i32 0)
|
||||
// CHECK: ret i32 [[CVT]]
|
||||
int32_t test_vcvth_n_u32_f16(float16_t a) {
|
||||
return vcvth_n_u32_f16(a, 0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vcvth_n_u64_f16
|
||||
// CHECK: [[CVT:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxu.i64.f16(half %a, i32 0)
|
||||
// CHECK: ret i64 [[CVT]]
|
||||
int64_t test_vcvth_n_u64_f16(float16_t a) {
|
||||
return vcvth_n_u64_f16(a, 0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vdivh_f16
|
||||
// CHECK: [[DIV:%.*]] = fdiv half %a, %b
|
||||
// CHECK: ret half [[DIV]]
|
||||
float16_t test_vdivh_f16(float16_t a, float16_t b) {
|
||||
return vdivh_f16(a, b);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vmaxh_f16
|
||||
// CHECK: [[MAX:%.*]] = call half @llvm.aarch64.neon.fmax.f16(half %a, half %b)
|
||||
// CHECK: ret half [[MAX]]
|
||||
float16_t test_vmaxh_f16(float16_t a, float16_t b) {
|
||||
return vmaxh_f16(a, b);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vmaxnmh_f16
|
||||
// CHECK: [[MAX:%.*]] = call half @llvm.aarch64.neon.fmaxnm.f16(half %a, half %b)
|
||||
// CHECK: ret half [[MAX]]
|
||||
float16_t test_vmaxnmh_f16(float16_t a, float16_t b) {
|
||||
return vmaxnmh_f16(a, b);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vminh_f16
|
||||
// CHECK: [[MIN:%.*]] = call half @llvm.aarch64.neon.fmin.f16(half %a, half %b)
|
||||
// CHECK: ret half [[MIN]]
|
||||
float16_t test_vminh_f16(float16_t a, float16_t b) {
|
||||
return vminh_f16(a, b);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vminnmh_f16
|
||||
// CHECK: [[MIN:%.*]] = call half @llvm.aarch64.neon.fminnm.f16(half %a, half %b)
|
||||
// CHECK: ret half [[MIN]]
|
||||
float16_t test_vminnmh_f16(float16_t a, float16_t b) {
|
||||
return vminnmh_f16(a, b);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vmulh_f16
|
||||
// CHECK: [[MUL:%.*]] = fmul half %a, %b
|
||||
// CHECK: ret half [[MUL]]
|
||||
float16_t test_vmulh_f16(float16_t a, float16_t b) {
|
||||
return vmulh_f16(a, b);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vmulxh_f16
|
||||
// CHECK: [[MUL:%.*]] = call half @llvm.aarch64.neon.fmulx.f16(half %a, half %b)
|
||||
// CHECK: ret half [[MUL]]
|
||||
float16_t test_vmulxh_f16(float16_t a, float16_t b) {
|
||||
return vmulxh_f16(a, b);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vrecpsh_f16
|
||||
// CHECK: [[RECPS:%.*]] = call half @llvm.aarch64.neon.frecps.f16(half %a, half %b)
|
||||
// CHECK: ret half [[RECPS]]
|
||||
float16_t test_vrecpsh_f16(float16_t a, float16_t b) {
|
||||
return vrecpsh_f16(a, b);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vrsqrtsh_f16
|
||||
// CHECK: [[RSQRTS:%.*]] = call half @llvm.aarch64.neon.frsqrts.f16(half %a, half %b)
|
||||
// CHECK: ret half [[RSQRTS]]
|
||||
float16_t test_vrsqrtsh_f16(float16_t a, float16_t b) {
|
||||
return vrsqrtsh_f16(a, b);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vsubh_f16
|
||||
// CHECK: [[SUB:%.*]] = fsub half %a, %b
|
||||
// CHECK: ret half [[SUB]]
|
||||
float16_t test_vsubh_f16(float16_t a, float16_t b) {
|
||||
return vsubh_f16(a, b);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vfmah_f16
|
||||
// CHECK: [[FMA:%.*]] = call half @llvm.fma.f16(half %b, half %c, half %a)
|
||||
// CHECK: ret half [[FMA]]
|
||||
float16_t test_vfmah_f16(float16_t a, float16_t b, float16_t c) {
|
||||
return vfmah_f16(a, b, c);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vfmsh_f16
|
||||
// CHECK: [[SUB:%.*]] = fsub half 0xH8000, %b
|
||||
// CHECK: [[ADD:%.*]] = call half @llvm.fma.f16(half [[SUB]], half %c, half %a)
|
||||
// CHECK: ret half [[ADD]]
|
||||
float16_t test_vfmsh_f16(float16_t a, float16_t b, float16_t c) {
|
||||
return vfmsh_f16(a, b, c);
|
||||
}
|
||||
|
|
@ -552,7 +552,11 @@ public:
|
|||
// run - Emit arm_neon.h.inc
|
||||
void run(raw_ostream &o);
|
||||
|
||||
// runFP16 - Emit arm_fp16.h.inc
|
||||
void runFP16(raw_ostream &o);
|
||||
|
||||
// runHeader - Emit all the __builtin prototypes used in arm_neon.h
|
||||
// and arm_fp16.h
|
||||
void runHeader(raw_ostream &o);
|
||||
|
||||
// runTests - Emit tests for all the Neon intrinsics.
|
||||
|
@ -852,6 +856,35 @@ void Type::applyModifier(char Mod) {
|
|||
NumVectors = 0;
|
||||
Float = true;
|
||||
break;
|
||||
case 'Y':
|
||||
Bitwidth = ElementBitwidth = 16;
|
||||
NumVectors = 0;
|
||||
Float = true;
|
||||
break;
|
||||
case 'I':
|
||||
Bitwidth = ElementBitwidth = 32;
|
||||
NumVectors = 0;
|
||||
Float = false;
|
||||
Signed = true;
|
||||
break;
|
||||
case 'L':
|
||||
Bitwidth = ElementBitwidth = 64;
|
||||
NumVectors = 0;
|
||||
Float = false;
|
||||
Signed = true;
|
||||
break;
|
||||
case 'U':
|
||||
Bitwidth = ElementBitwidth = 32;
|
||||
NumVectors = 0;
|
||||
Float = false;
|
||||
Signed = false;
|
||||
break;
|
||||
case 'O':
|
||||
Bitwidth = ElementBitwidth = 64;
|
||||
NumVectors = 0;
|
||||
Float = false;
|
||||
Signed = false;
|
||||
break;
|
||||
case 'f':
|
||||
Float = true;
|
||||
ElementBitwidth = 32;
|
||||
|
@ -1010,7 +1043,7 @@ std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const {
|
|||
}
|
||||
|
||||
static bool isFloatingPointProtoModifier(char Mod) {
|
||||
return Mod == 'F' || Mod == 'f' || Mod == 'H';
|
||||
return Mod == 'F' || Mod == 'f' || Mod == 'H' || Mod == 'Y' || Mod == 'I';
|
||||
}
|
||||
|
||||
std::string Intrinsic::getBuiltinTypeStr() {
|
||||
|
@ -2420,12 +2453,125 @@ void NeonEmitter::run(raw_ostream &OS) {
|
|||
OS << "#endif /* __ARM_NEON_H */\n";
|
||||
}
|
||||
|
||||
/// run - Read the records in arm_fp16.td and output arm_fp16.h. arm_fp16.h
|
||||
/// is comprised of type definitions and function declarations.
|
||||
void NeonEmitter::runFP16(raw_ostream &OS) {
|
||||
OS << "/*===---- arm_fp16.h - ARM FP16 intrinsics "
|
||||
"------------------------------"
|
||||
"---===\n"
|
||||
" *\n"
|
||||
" * Permission is hereby granted, free of charge, to any person "
|
||||
"obtaining a copy\n"
|
||||
" * of this software and associated documentation files (the "
|
||||
"\"Software\"), to deal\n"
|
||||
" * in the Software without restriction, including without limitation "
|
||||
"the rights\n"
|
||||
" * to use, copy, modify, merge, publish, distribute, sublicense, "
|
||||
"and/or sell\n"
|
||||
" * copies of the Software, and to permit persons to whom the Software "
|
||||
"is\n"
|
||||
" * furnished to do so, subject to the following conditions:\n"
|
||||
" *\n"
|
||||
" * The above copyright notice and this permission notice shall be "
|
||||
"included in\n"
|
||||
" * all copies or substantial portions of the Software.\n"
|
||||
" *\n"
|
||||
" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
|
||||
"EXPRESS OR\n"
|
||||
" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
|
||||
"MERCHANTABILITY,\n"
|
||||
" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
|
||||
"SHALL THE\n"
|
||||
" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
|
||||
"OTHER\n"
|
||||
" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
|
||||
"ARISING FROM,\n"
|
||||
" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
|
||||
"DEALINGS IN\n"
|
||||
" * THE SOFTWARE.\n"
|
||||
" *\n"
|
||||
" *===-----------------------------------------------------------------"
|
||||
"---"
|
||||
"---===\n"
|
||||
" */\n\n";
|
||||
|
||||
OS << "#ifndef __ARM_FP16_H\n";
|
||||
OS << "#define __ARM_FP16_H\n\n";
|
||||
|
||||
OS << "#include <stdint.h>\n\n";
|
||||
|
||||
OS << "typedef __fp16 float16_t;\n";
|
||||
|
||||
OS << "#define __ai static inline __attribute__((__always_inline__, "
|
||||
"__nodebug__))\n\n";
|
||||
|
||||
SmallVector<Intrinsic *, 128> Defs;
|
||||
std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
|
||||
for (auto *R : RV)
|
||||
createIntrinsic(R, Defs);
|
||||
|
||||
for (auto *I : Defs)
|
||||
I->indexBody();
|
||||
|
||||
std::stable_sort(
|
||||
Defs.begin(), Defs.end(),
|
||||
[](const Intrinsic *A, const Intrinsic *B) { return *A < *B; });
|
||||
|
||||
// Only emit a def when its requirements have been met.
|
||||
// FIXME: This loop could be made faster, but it's fast enough for now.
|
||||
bool MadeProgress = true;
|
||||
std::string InGuard;
|
||||
while (!Defs.empty() && MadeProgress) {
|
||||
MadeProgress = false;
|
||||
|
||||
for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();
|
||||
I != Defs.end(); /*No step*/) {
|
||||
bool DependenciesSatisfied = true;
|
||||
for (auto *II : (*I)->getDependencies()) {
|
||||
if (std::find(Defs.begin(), Defs.end(), II) != Defs.end())
|
||||
DependenciesSatisfied = false;
|
||||
}
|
||||
if (!DependenciesSatisfied) {
|
||||
// Try the next one.
|
||||
++I;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Emit #endif/#if pair if needed.
|
||||
if ((*I)->getGuard() != InGuard) {
|
||||
if (!InGuard.empty())
|
||||
OS << "#endif\n";
|
||||
InGuard = (*I)->getGuard();
|
||||
if (!InGuard.empty())
|
||||
OS << "#if " << InGuard << "\n";
|
||||
}
|
||||
|
||||
// Actually generate the intrinsic code.
|
||||
OS << (*I)->generate();
|
||||
|
||||
MadeProgress = true;
|
||||
I = Defs.erase(I);
|
||||
}
|
||||
}
|
||||
assert(Defs.empty() && "Some requirements were not satisfied!");
|
||||
if (!InGuard.empty())
|
||||
OS << "#endif\n";
|
||||
|
||||
OS << "\n";
|
||||
OS << "#undef __ai\n\n";
|
||||
OS << "#endif /* __ARM_FP16_H */\n";
|
||||
}
|
||||
|
||||
namespace clang {
|
||||
|
||||
void EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
|
||||
NeonEmitter(Records).run(OS);
|
||||
}
|
||||
|
||||
void EmitFP16(RecordKeeper &Records, raw_ostream &OS) {
|
||||
NeonEmitter(Records).runFP16(OS);
|
||||
}
|
||||
|
||||
void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
|
||||
NeonEmitter(Records).runHeader(OS);
|
||||
}
|
||||
|
|
|
@ -52,6 +52,7 @@ enum ActionType {
|
|||
GenClangCommentCommandInfo,
|
||||
GenClangCommentCommandList,
|
||||
GenArmNeon,
|
||||
GenArmFP16,
|
||||
GenArmNeonSema,
|
||||
GenArmNeonTest,
|
||||
GenAttrDocs,
|
||||
|
@ -139,6 +140,7 @@ cl::opt<ActionType> Action(
|
|||
"Generate list of commands that are used in "
|
||||
"documentation comments"),
|
||||
clEnumValN(GenArmNeon, "gen-arm-neon", "Generate arm_neon.h for clang"),
|
||||
clEnumValN(GenArmFP16, "gen-arm-fp16", "Generate arm_fp16.h for clang"),
|
||||
clEnumValN(GenArmNeonSema, "gen-arm-neon-sema",
|
||||
"Generate ARM NEON sema support for clang"),
|
||||
clEnumValN(GenArmNeonTest, "gen-arm-neon-test",
|
||||
|
@ -250,6 +252,9 @@ bool ClangTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
|
|||
case GenArmNeon:
|
||||
EmitNeon(Records, OS);
|
||||
break;
|
||||
case GenArmFP16:
|
||||
EmitFP16(Records, OS);
|
||||
break;
|
||||
case GenArmNeonSema:
|
||||
EmitNeonSema(Records, OS);
|
||||
break;
|
||||
|
|
|
@ -65,6 +65,7 @@ void EmitClangCommentCommandInfo(RecordKeeper &Records, raw_ostream &OS);
|
|||
void EmitClangCommentCommandList(RecordKeeper &Records, raw_ostream &OS);
|
||||
|
||||
void EmitNeon(RecordKeeper &Records, raw_ostream &OS);
|
||||
void EmitFP16(RecordKeeper &Records, raw_ostream &OS);
|
||||
void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS);
|
||||
void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS);
|
||||
void EmitNeon2(RecordKeeper &Records, raw_ostream &OS);
|
||||
|
|
Loading…
Reference in New Issue