2011-10-06 21:03:08 +08:00
|
|
|
//===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//
|
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This tablegen backend is responsible for emitting arm_neon.h, which includes
|
|
|
|
// a declaration and definition of each function specified by the ARM NEON
|
|
|
|
// compiler interface. See ARM document DUI0348B.
|
|
|
|
//
|
|
|
|
// Each NEON instruction is implemented in terms of 1 or more functions which
|
|
|
|
// are suffixed with the element type of the input vectors. Functions may be
|
|
|
|
// implemented in terms of generic vector operations such as +, *, -, etc. or
|
|
|
|
// by calling a __builtin_-prefixed function which will be handled by clang's
|
|
|
|
// CodeGen library.
|
|
|
|
//
|
|
|
|
// Additional validation code can be generated by this file when runHeader() is
|
|
|
|
// called, rather than the normal run() entry point. A complete set of tests
|
|
|
|
// for Neon intrinsics can be generated by calling the runTests() entry point.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2012-06-13 13:12:41 +08:00
|
|
|
#include "llvm/ADT/DenseMap.h"
|
2011-10-06 21:03:08 +08:00
|
|
|
#include "llvm/ADT/SmallString.h"
|
|
|
|
#include "llvm/ADT/SmallVector.h"
|
|
|
|
#include "llvm/ADT/StringExtras.h"
|
2012-06-13 13:12:41 +08:00
|
|
|
#include "llvm/ADT/StringMap.h"
|
2012-01-17 14:56:22 +08:00
|
|
|
#include "llvm/Support/ErrorHandling.h"
|
2012-06-13 13:12:41 +08:00
|
|
|
#include "llvm/TableGen/Error.h"
|
|
|
|
#include "llvm/TableGen/Record.h"
|
|
|
|
#include "llvm/TableGen/TableGenBackend.h"
|
2011-10-06 21:03:08 +08:00
|
|
|
#include <string>
|
|
|
|
using namespace llvm;
|
|
|
|
|
2012-06-13 13:12:41 +08:00
|
|
|
enum OpKind {
|
|
|
|
OpNone,
|
|
|
|
OpUnavailable,
|
|
|
|
OpAdd,
|
|
|
|
OpAddl,
|
Implement aarch64 neon instruction set AdvSIMD (3V Diff), covering the following 26 instructions,
SADDL, UADDL, SADDW, UADDW, SSUBL, USUBL, SSUBW, USUBW, ADDHN, RADDHN, SABAL, UABAL, SUBHN, RSUBHN, SABDL, UABDL, SMLAL, UMLAL, SMLSL, UMLSL, SQDMLAL, SQDMLSL, SMULL, UMULL, SQDMULL, PMULL
llvm-svn: 190289
2013-09-09 10:21:08 +08:00
|
|
|
OpAddlHi,
|
2012-06-13 13:12:41 +08:00
|
|
|
OpAddw,
|
Implement aarch64 neon instruction set AdvSIMD (3V Diff), covering the following 26 instructions,
SADDL, UADDL, SADDW, UADDW, SSUBL, USUBL, SSUBW, USUBW, ADDHN, RADDHN, SABAL, UABAL, SUBHN, RSUBHN, SABDL, UABDL, SMLAL, UMLAL, SMLSL, UMLSL, SQDMLAL, SQDMLSL, SMULL, UMULL, SQDMULL, PMULL
llvm-svn: 190289
2013-09-09 10:21:08 +08:00
|
|
|
OpAddwHi,
|
2012-06-13 13:12:41 +08:00
|
|
|
OpSub,
|
|
|
|
OpSubl,
|
Implement aarch64 neon instruction set AdvSIMD (3V Diff), covering the following 26 instructions,
SADDL, UADDL, SADDW, UADDW, SSUBL, USUBL, SSUBW, USUBW, ADDHN, RADDHN, SABAL, UABAL, SUBHN, RSUBHN, SABDL, UABDL, SMLAL, UMLAL, SMLSL, UMLSL, SQDMLAL, SQDMLSL, SMULL, UMULL, SQDMULL, PMULL
llvm-svn: 190289
2013-09-09 10:21:08 +08:00
|
|
|
OpSublHi,
|
2012-06-13 13:12:41 +08:00
|
|
|
OpSubw,
|
Implement aarch64 neon instruction set AdvSIMD (3V Diff), covering the following 26 instructions,
SADDL, UADDL, SADDW, UADDW, SSUBL, USUBL, SSUBW, USUBW, ADDHN, RADDHN, SABAL, UABAL, SUBHN, RSUBHN, SABDL, UABDL, SMLAL, UMLAL, SMLSL, UMLSL, SQDMLAL, SQDMLSL, SMULL, UMULL, SQDMULL, PMULL
llvm-svn: 190289
2013-09-09 10:21:08 +08:00
|
|
|
OpSubwHi,
|
2012-06-13 13:12:41 +08:00
|
|
|
OpMul,
|
|
|
|
OpMla,
|
|
|
|
OpMlal,
|
Implement aarch64 neon instruction set AdvSIMD (3V Diff), covering the following 26 instructions,
SADDL, UADDL, SADDW, UADDW, SSUBL, USUBL, SSUBW, USUBW, ADDHN, RADDHN, SABAL, UABAL, SUBHN, RSUBHN, SABDL, UABDL, SMLAL, UMLAL, SMLSL, UMLSL, SQDMLAL, SQDMLSL, SMULL, UMULL, SQDMULL, PMULL
llvm-svn: 190289
2013-09-09 10:21:08 +08:00
|
|
|
OpMullHi,
|
|
|
|
OpMlalHi,
|
2012-06-13 13:12:41 +08:00
|
|
|
OpMls,
|
|
|
|
OpMlsl,
|
Implement aarch64 neon instruction set AdvSIMD (3V Diff), covering the following 26 instructions,
SADDL, UADDL, SADDW, UADDW, SSUBL, USUBL, SSUBW, USUBW, ADDHN, RADDHN, SABAL, UABAL, SUBHN, RSUBHN, SABDL, UABDL, SMLAL, UMLAL, SMLSL, UMLSL, SQDMLAL, SQDMLSL, SMULL, UMULL, SQDMULL, PMULL
llvm-svn: 190289
2013-09-09 10:21:08 +08:00
|
|
|
OpMlslHi,
|
2012-06-13 13:12:41 +08:00
|
|
|
OpMulN,
|
|
|
|
OpMlaN,
|
|
|
|
OpMlsN,
|
|
|
|
OpMlalN,
|
|
|
|
OpMlslN,
|
|
|
|
OpMulLane,
|
2013-10-04 17:21:17 +08:00
|
|
|
OpMulXLane,
|
2012-06-13 13:12:41 +08:00
|
|
|
OpMullLane,
|
2013-10-04 17:21:17 +08:00
|
|
|
OpMullHiLane,
|
2012-06-13 13:12:41 +08:00
|
|
|
OpMlaLane,
|
|
|
|
OpMlsLane,
|
|
|
|
OpMlalLane,
|
2013-10-04 17:21:17 +08:00
|
|
|
OpMlalHiLane,
|
2012-06-13 13:12:41 +08:00
|
|
|
OpMlslLane,
|
2013-10-04 17:21:17 +08:00
|
|
|
OpMlslHiLane,
|
2012-06-13 13:12:41 +08:00
|
|
|
OpQDMullLane,
|
2013-10-04 17:21:17 +08:00
|
|
|
OpQDMullHiLane,
|
2012-06-13 13:12:41 +08:00
|
|
|
OpQDMlalLane,
|
2013-10-04 17:21:17 +08:00
|
|
|
OpQDMlalHiLane,
|
2012-06-13 13:12:41 +08:00
|
|
|
OpQDMlslLane,
|
2013-10-04 17:21:17 +08:00
|
|
|
OpQDMlslHiLane,
|
2012-06-13 13:12:41 +08:00
|
|
|
OpQDMulhLane,
|
|
|
|
OpQRDMulhLane,
|
2013-10-04 17:21:17 +08:00
|
|
|
OpFMSLane,
|
|
|
|
OpFMSLaneQ,
|
2013-11-06 11:35:53 +08:00
|
|
|
OpTrn1,
|
|
|
|
OpZip1,
|
|
|
|
OpUzp1,
|
|
|
|
OpTrn2,
|
|
|
|
OpZip2,
|
|
|
|
OpUzp2,
|
2012-06-13 13:12:41 +08:00
|
|
|
OpEq,
|
|
|
|
OpGe,
|
|
|
|
OpLe,
|
|
|
|
OpGt,
|
|
|
|
OpLt,
|
|
|
|
OpNeg,
|
|
|
|
OpNot,
|
|
|
|
OpAnd,
|
|
|
|
OpOr,
|
|
|
|
OpXor,
|
|
|
|
OpAndNot,
|
|
|
|
OpOrNot,
|
|
|
|
OpCast,
|
|
|
|
OpConcat,
|
|
|
|
OpDup,
|
|
|
|
OpDupLane,
|
|
|
|
OpHi,
|
|
|
|
OpLo,
|
|
|
|
OpSelect,
|
|
|
|
OpRev16,
|
|
|
|
OpRev32,
|
|
|
|
OpRev64,
|
2013-11-14 10:45:18 +08:00
|
|
|
OpXtnHi,
|
|
|
|
OpSqxtunHi,
|
|
|
|
OpQxtnHi,
|
|
|
|
OpFcvtnHi,
|
|
|
|
OpFcvtlHi,
|
|
|
|
OpFcvtxnHi,
|
2012-06-13 13:12:41 +08:00
|
|
|
OpReinterpret,
|
Implement aarch64 neon instruction set AdvSIMD (3V Diff), covering the following 26 instructions,
SADDL, UADDL, SADDW, UADDW, SSUBL, USUBL, SSUBW, USUBW, ADDHN, RADDHN, SABAL, UABAL, SUBHN, RSUBHN, SABDL, UABDL, SMLAL, UMLAL, SMLSL, UMLSL, SQDMLAL, SQDMLSL, SMULL, UMULL, SQDMULL, PMULL
llvm-svn: 190289
2013-09-09 10:21:08 +08:00
|
|
|
OpAddhnHi,
|
|
|
|
OpRAddhnHi,
|
|
|
|
OpSubhnHi,
|
|
|
|
OpRSubhnHi,
|
2012-06-13 13:12:41 +08:00
|
|
|
OpAbdl,
|
Implement aarch64 neon instruction set AdvSIMD (3V Diff), covering the following 26 instructions,
SADDL, UADDL, SADDW, UADDW, SSUBL, USUBL, SSUBW, USUBW, ADDHN, RADDHN, SABAL, UABAL, SUBHN, RSUBHN, SABDL, UABDL, SMLAL, UMLAL, SMLSL, UMLSL, SQDMLAL, SQDMLSL, SMULL, UMULL, SQDMULL, PMULL
llvm-svn: 190289
2013-09-09 10:21:08 +08:00
|
|
|
OpAbdlHi,
|
2012-06-13 13:12:41 +08:00
|
|
|
OpAba,
|
2013-08-01 17:23:19 +08:00
|
|
|
OpAbal,
|
Implement aarch64 neon instruction set AdvSIMD (3V Diff), covering the following 26 instructions,
SADDL, UADDL, SADDW, UADDW, SSUBL, USUBL, SSUBW, USUBW, ADDHN, RADDHN, SABAL, UABAL, SUBHN, RSUBHN, SABDL, UABDL, SMLAL, UMLAL, SMLSL, UMLSL, SQDMLAL, SQDMLSL, SMULL, UMULL, SQDMULL, PMULL
llvm-svn: 190289
2013-09-09 10:21:08 +08:00
|
|
|
OpAbalHi,
|
|
|
|
OpQDMullHi,
|
|
|
|
OpQDMlalHi,
|
|
|
|
OpQDMlslHi,
|
Inplement aarch64 neon instructions in AdvSIMD(shift). About 24 shift instructions:
sshr,ushr,ssra,usra,srshr,urshr,srsra,ursra,sri,shl,sli,sqshlu,sqshl,uqshl,shrn,sqrshr$
and 4 convert instructions:
scvtf,ucvtf,fcvtzs,fcvtzu
llvm-svn: 189926
2013-09-04 17:29:13 +08:00
|
|
|
OpDiv,
|
|
|
|
OpLongHi,
|
|
|
|
OpNarrowHi,
|
2013-10-11 10:34:30 +08:00
|
|
|
OpMovlHi,
|
2013-11-05 10:05:44 +08:00
|
|
|
OpCopyLane,
|
|
|
|
OpCopyQLane,
|
2013-11-16 07:33:31 +08:00
|
|
|
OpCopyLaneQ,
|
|
|
|
OpScalarMulLane,
|
|
|
|
OpScalarMulLaneQ,
|
|
|
|
OpScalarMulXLane,
|
|
|
|
OpScalarMulXLaneQ,
|
|
|
|
OpScalarVMulXLane,
|
|
|
|
OpScalarVMulXLaneQ
|
2012-06-13 13:12:41 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
enum ClassKind {
|
|
|
|
ClassNone,
|
|
|
|
ClassI, // generic integer instruction, e.g., "i8" suffix
|
|
|
|
ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
|
|
|
|
ClassW, // width-specific instruction, e.g., "8" suffix
|
2013-04-17 05:18:42 +08:00
|
|
|
ClassB, // bitcast arguments with enum argument to specify type
|
|
|
|
ClassL, // Logical instructions which are op instructions
|
|
|
|
// but we need to not emit any suffix for in our
|
|
|
|
// tests.
|
|
|
|
ClassNoTest // Instructions which we do not test since they are
|
|
|
|
// not TRUE instructions.
|
2012-06-13 13:12:41 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/// NeonTypeFlags - Flags to identify the types for overloaded Neon
|
|
|
|
/// builtins. These must be kept in sync with the flags in
|
|
|
|
/// include/clang/Basic/TargetBuiltins.h.
|
|
|
|
namespace {
|
|
|
|
class NeonTypeFlags {
|
|
|
|
enum {
|
|
|
|
EltTypeMask = 0xf,
|
|
|
|
UnsignedFlag = 0x10,
|
|
|
|
QuadFlag = 0x20
|
|
|
|
};
|
|
|
|
uint32_t Flags;
|
|
|
|
|
|
|
|
public:
|
|
|
|
enum EltType {
|
|
|
|
Int8,
|
|
|
|
Int16,
|
|
|
|
Int32,
|
|
|
|
Int64,
|
|
|
|
Poly8,
|
|
|
|
Poly16,
|
2013-11-14 11:29:16 +08:00
|
|
|
Poly64,
|
2012-06-13 13:12:41 +08:00
|
|
|
Float16,
|
2013-08-01 17:23:19 +08:00
|
|
|
Float32,
|
|
|
|
Float64
|
2012-06-13 13:12:41 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
NeonTypeFlags(unsigned F) : Flags(F) {}
|
|
|
|
NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) {
|
|
|
|
if (IsUnsigned)
|
|
|
|
Flags |= UnsignedFlag;
|
|
|
|
if (IsQuad)
|
|
|
|
Flags |= QuadFlag;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32_t getFlags() const { return Flags; }
|
|
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
class NeonEmitter {
|
|
|
|
RecordKeeper &Records;
|
|
|
|
StringMap<OpKind> OpMap;
|
|
|
|
DenseMap<Record*, ClassKind> ClassMap;
|
|
|
|
|
|
|
|
public:
|
|
|
|
NeonEmitter(RecordKeeper &R) : Records(R) {
|
|
|
|
OpMap["OP_NONE"] = OpNone;
|
|
|
|
OpMap["OP_UNAVAILABLE"] = OpUnavailable;
|
|
|
|
OpMap["OP_ADD"] = OpAdd;
|
|
|
|
OpMap["OP_ADDL"] = OpAddl;
|
Implement aarch64 neon instruction set AdvSIMD (3V Diff), covering the following 26 instructions,
SADDL, UADDL, SADDW, UADDW, SSUBL, USUBL, SSUBW, USUBW, ADDHN, RADDHN, SABAL, UABAL, SUBHN, RSUBHN, SABDL, UABDL, SMLAL, UMLAL, SMLSL, UMLSL, SQDMLAL, SQDMLSL, SMULL, UMULL, SQDMULL, PMULL
llvm-svn: 190289
2013-09-09 10:21:08 +08:00
|
|
|
OpMap["OP_ADDLHi"] = OpAddlHi;
|
2012-06-13 13:12:41 +08:00
|
|
|
OpMap["OP_ADDW"] = OpAddw;
|
Implement aarch64 neon instruction set AdvSIMD (3V Diff), covering the following 26 instructions,
SADDL, UADDL, SADDW, UADDW, SSUBL, USUBL, SSUBW, USUBW, ADDHN, RADDHN, SABAL, UABAL, SUBHN, RSUBHN, SABDL, UABDL, SMLAL, UMLAL, SMLSL, UMLSL, SQDMLAL, SQDMLSL, SMULL, UMULL, SQDMULL, PMULL
llvm-svn: 190289
2013-09-09 10:21:08 +08:00
|
|
|
OpMap["OP_ADDWHi"] = OpAddwHi;
|
2012-06-13 13:12:41 +08:00
|
|
|
OpMap["OP_SUB"] = OpSub;
|
|
|
|
OpMap["OP_SUBL"] = OpSubl;
|
Implement aarch64 neon instruction set AdvSIMD (3V Diff), covering the following 26 instructions,
SADDL, UADDL, SADDW, UADDW, SSUBL, USUBL, SSUBW, USUBW, ADDHN, RADDHN, SABAL, UABAL, SUBHN, RSUBHN, SABDL, UABDL, SMLAL, UMLAL, SMLSL, UMLSL, SQDMLAL, SQDMLSL, SMULL, UMULL, SQDMULL, PMULL
llvm-svn: 190289
2013-09-09 10:21:08 +08:00
|
|
|
OpMap["OP_SUBLHi"] = OpSublHi;
|
2012-06-13 13:12:41 +08:00
|
|
|
OpMap["OP_SUBW"] = OpSubw;
|
Implement aarch64 neon instruction set AdvSIMD (3V Diff), covering the following 26 instructions,
SADDL, UADDL, SADDW, UADDW, SSUBL, USUBL, SSUBW, USUBW, ADDHN, RADDHN, SABAL, UABAL, SUBHN, RSUBHN, SABDL, UABDL, SMLAL, UMLAL, SMLSL, UMLSL, SQDMLAL, SQDMLSL, SMULL, UMULL, SQDMULL, PMULL
llvm-svn: 190289
2013-09-09 10:21:08 +08:00
|
|
|
OpMap["OP_SUBWHi"] = OpSubwHi;
|
2012-06-13 13:12:41 +08:00
|
|
|
OpMap["OP_MUL"] = OpMul;
|
|
|
|
OpMap["OP_MLA"] = OpMla;
|
|
|
|
OpMap["OP_MLAL"] = OpMlal;
|
Implement aarch64 neon instruction set AdvSIMD (3V Diff), covering the following 26 instructions,
SADDL, UADDL, SADDW, UADDW, SSUBL, USUBL, SSUBW, USUBW, ADDHN, RADDHN, SABAL, UABAL, SUBHN, RSUBHN, SABDL, UABDL, SMLAL, UMLAL, SMLSL, UMLSL, SQDMLAL, SQDMLSL, SMULL, UMULL, SQDMULL, PMULL
llvm-svn: 190289
2013-09-09 10:21:08 +08:00
|
|
|
OpMap["OP_MULLHi"] = OpMullHi;
|
|
|
|
OpMap["OP_MLALHi"] = OpMlalHi;
|
2012-06-13 13:12:41 +08:00
|
|
|
OpMap["OP_MLS"] = OpMls;
|
|
|
|
OpMap["OP_MLSL"] = OpMlsl;
|
Implement aarch64 neon instruction set AdvSIMD (3V Diff), covering the following 26 instructions,
SADDL, UADDL, SADDW, UADDW, SSUBL, USUBL, SSUBW, USUBW, ADDHN, RADDHN, SABAL, UABAL, SUBHN, RSUBHN, SABDL, UABDL, SMLAL, UMLAL, SMLSL, UMLSL, SQDMLAL, SQDMLSL, SMULL, UMULL, SQDMULL, PMULL
llvm-svn: 190289
2013-09-09 10:21:08 +08:00
|
|
|
OpMap["OP_MLSLHi"] = OpMlslHi;
|
2012-06-13 13:12:41 +08:00
|
|
|
OpMap["OP_MUL_N"] = OpMulN;
|
|
|
|
OpMap["OP_MLA_N"] = OpMlaN;
|
|
|
|
OpMap["OP_MLS_N"] = OpMlsN;
|
|
|
|
OpMap["OP_MLAL_N"] = OpMlalN;
|
|
|
|
OpMap["OP_MLSL_N"] = OpMlslN;
|
|
|
|
OpMap["OP_MUL_LN"]= OpMulLane;
|
2013-10-04 17:21:17 +08:00
|
|
|
OpMap["OP_MULX_LN"]= OpMulXLane;
|
2012-06-13 13:12:41 +08:00
|
|
|
OpMap["OP_MULL_LN"] = OpMullLane;
|
2013-10-04 17:21:17 +08:00
|
|
|
OpMap["OP_MULLHi_LN"] = OpMullHiLane;
|
2012-06-13 13:12:41 +08:00
|
|
|
OpMap["OP_MLA_LN"]= OpMlaLane;
|
|
|
|
OpMap["OP_MLS_LN"]= OpMlsLane;
|
|
|
|
OpMap["OP_MLAL_LN"] = OpMlalLane;
|
2013-10-04 17:21:17 +08:00
|
|
|
OpMap["OP_MLALHi_LN"] = OpMlalHiLane;
|
2012-06-13 13:12:41 +08:00
|
|
|
OpMap["OP_MLSL_LN"] = OpMlslLane;
|
2013-10-04 17:21:17 +08:00
|
|
|
OpMap["OP_MLSLHi_LN"] = OpMlslHiLane;
|
2012-06-13 13:12:41 +08:00
|
|
|
OpMap["OP_QDMULL_LN"] = OpQDMullLane;
|
2013-10-04 17:21:17 +08:00
|
|
|
OpMap["OP_QDMULLHi_LN"] = OpQDMullHiLane;
|
2012-06-13 13:12:41 +08:00
|
|
|
OpMap["OP_QDMLAL_LN"] = OpQDMlalLane;
|
2013-10-04 17:21:17 +08:00
|
|
|
OpMap["OP_QDMLALHi_LN"] = OpQDMlalHiLane;
|
2012-06-13 13:12:41 +08:00
|
|
|
OpMap["OP_QDMLSL_LN"] = OpQDMlslLane;
|
2013-10-04 17:21:17 +08:00
|
|
|
OpMap["OP_QDMLSLHi_LN"] = OpQDMlslHiLane;
|
2012-06-13 13:12:41 +08:00
|
|
|
OpMap["OP_QDMULH_LN"] = OpQDMulhLane;
|
|
|
|
OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane;
|
2013-10-04 17:21:17 +08:00
|
|
|
OpMap["OP_FMS_LN"] = OpFMSLane;
|
|
|
|
OpMap["OP_FMS_LNQ"] = OpFMSLaneQ;
|
2013-11-06 11:35:53 +08:00
|
|
|
OpMap["OP_TRN1"] = OpTrn1;
|
|
|
|
OpMap["OP_ZIP1"] = OpZip1;
|
|
|
|
OpMap["OP_UZP1"] = OpUzp1;
|
|
|
|
OpMap["OP_TRN2"] = OpTrn2;
|
|
|
|
OpMap["OP_ZIP2"] = OpZip2;
|
|
|
|
OpMap["OP_UZP2"] = OpUzp2;
|
2012-06-13 13:12:41 +08:00
|
|
|
OpMap["OP_EQ"] = OpEq;
|
|
|
|
OpMap["OP_GE"] = OpGe;
|
|
|
|
OpMap["OP_LE"] = OpLe;
|
|
|
|
OpMap["OP_GT"] = OpGt;
|
|
|
|
OpMap["OP_LT"] = OpLt;
|
|
|
|
OpMap["OP_NEG"] = OpNeg;
|
|
|
|
OpMap["OP_NOT"] = OpNot;
|
|
|
|
OpMap["OP_AND"] = OpAnd;
|
|
|
|
OpMap["OP_OR"] = OpOr;
|
|
|
|
OpMap["OP_XOR"] = OpXor;
|
|
|
|
OpMap["OP_ANDN"] = OpAndNot;
|
|
|
|
OpMap["OP_ORN"] = OpOrNot;
|
|
|
|
OpMap["OP_CAST"] = OpCast;
|
|
|
|
OpMap["OP_CONC"] = OpConcat;
|
|
|
|
OpMap["OP_HI"] = OpHi;
|
|
|
|
OpMap["OP_LO"] = OpLo;
|
|
|
|
OpMap["OP_DUP"] = OpDup;
|
|
|
|
OpMap["OP_DUP_LN"] = OpDupLane;
|
|
|
|
OpMap["OP_SEL"] = OpSelect;
|
|
|
|
OpMap["OP_REV16"] = OpRev16;
|
|
|
|
OpMap["OP_REV32"] = OpRev32;
|
|
|
|
OpMap["OP_REV64"] = OpRev64;
|
2013-11-14 10:45:18 +08:00
|
|
|
OpMap["OP_XTN"] = OpXtnHi;
|
|
|
|
OpMap["OP_SQXTUN"] = OpSqxtunHi;
|
|
|
|
OpMap["OP_QXTN"] = OpQxtnHi;
|
|
|
|
OpMap["OP_VCVT_NA_HI"] = OpFcvtnHi;
|
|
|
|
OpMap["OP_VCVT_EX_HI"] = OpFcvtlHi;
|
|
|
|
OpMap["OP_VCVTX_HI"] = OpFcvtxnHi;
|
2012-06-13 13:12:41 +08:00
|
|
|
OpMap["OP_REINT"] = OpReinterpret;
|
Implement aarch64 neon instruction set AdvSIMD (3V Diff), covering the following 26 instructions,
SADDL, UADDL, SADDW, UADDW, SSUBL, USUBL, SSUBW, USUBW, ADDHN, RADDHN, SABAL, UABAL, SUBHN, RSUBHN, SABDL, UABDL, SMLAL, UMLAL, SMLSL, UMLSL, SQDMLAL, SQDMLSL, SMULL, UMULL, SQDMULL, PMULL
llvm-svn: 190289
2013-09-09 10:21:08 +08:00
|
|
|
OpMap["OP_ADDHNHi"] = OpAddhnHi;
|
|
|
|
OpMap["OP_RADDHNHi"] = OpRAddhnHi;
|
|
|
|
OpMap["OP_SUBHNHi"] = OpSubhnHi;
|
|
|
|
OpMap["OP_RSUBHNHi"] = OpRSubhnHi;
|
2012-06-13 13:12:41 +08:00
|
|
|
OpMap["OP_ABDL"] = OpAbdl;
|
Implement aarch64 neon instruction set AdvSIMD (3V Diff), covering the following 26 instructions,
SADDL, UADDL, SADDW, UADDW, SSUBL, USUBL, SSUBW, USUBW, ADDHN, RADDHN, SABAL, UABAL, SUBHN, RSUBHN, SABDL, UABDL, SMLAL, UMLAL, SMLSL, UMLSL, SQDMLAL, SQDMLSL, SMULL, UMULL, SQDMULL, PMULL
llvm-svn: 190289
2013-09-09 10:21:08 +08:00
|
|
|
OpMap["OP_ABDLHi"] = OpAbdlHi;
|
2012-06-13 13:12:41 +08:00
|
|
|
OpMap["OP_ABA"] = OpAba;
|
|
|
|
OpMap["OP_ABAL"] = OpAbal;
|
Implement aarch64 neon instruction set AdvSIMD (3V Diff), covering the following 26 instructions,
SADDL, UADDL, SADDW, UADDW, SSUBL, USUBL, SSUBW, USUBW, ADDHN, RADDHN, SABAL, UABAL, SUBHN, RSUBHN, SABDL, UABDL, SMLAL, UMLAL, SMLSL, UMLSL, SQDMLAL, SQDMLSL, SMULL, UMULL, SQDMULL, PMULL
llvm-svn: 190289
2013-09-09 10:21:08 +08:00
|
|
|
OpMap["OP_ABALHi"] = OpAbalHi;
|
|
|
|
OpMap["OP_QDMULLHi"] = OpQDMullHi;
|
|
|
|
OpMap["OP_QDMLALHi"] = OpQDMlalHi;
|
|
|
|
OpMap["OP_QDMLSLHi"] = OpQDMlslHi;
|
2013-08-01 17:23:19 +08:00
|
|
|
OpMap["OP_DIV"] = OpDiv;
|
Inplement aarch64 neon instructions in AdvSIMD(shift). About 24 shift instructions:
sshr,ushr,ssra,usra,srshr,urshr,srsra,ursra,sri,shl,sli,sqshlu,sqshl,uqshl,shrn,sqrshr$
and 4 convert instructions:
scvtf,ucvtf,fcvtzs,fcvtzu
llvm-svn: 189926
2013-09-04 17:29:13 +08:00
|
|
|
OpMap["OP_LONG_HI"] = OpLongHi;
|
|
|
|
OpMap["OP_NARROW_HI"] = OpNarrowHi;
|
|
|
|
OpMap["OP_MOVL_HI"] = OpMovlHi;
|
2013-11-05 10:05:44 +08:00
|
|
|
OpMap["OP_COPY_LN"] = OpCopyLane;
|
|
|
|
OpMap["OP_COPYQ_LN"] = OpCopyQLane;
|
|
|
|
OpMap["OP_COPY_LNQ"] = OpCopyLaneQ;
|
2013-11-16 07:33:31 +08:00
|
|
|
OpMap["OP_SCALAR_MUL_LN"]= OpScalarMulLane;
|
|
|
|
OpMap["OP_SCALAR_MUL_LNQ"]= OpScalarMulLaneQ;
|
|
|
|
OpMap["OP_SCALAR_MULX_LN"]= OpScalarMulXLane;
|
|
|
|
OpMap["OP_SCALAR_MULX_LNQ"]= OpScalarMulXLaneQ;
|
|
|
|
OpMap["OP_SCALAR_VMULX_LN"]= OpScalarVMulXLane;
|
|
|
|
OpMap["OP_SCALAR_VMULX_LNQ"]= OpScalarVMulXLaneQ;
|
2012-06-13 13:12:41 +08:00
|
|
|
|
|
|
|
Record *SI = R.getClass("SInst");
|
|
|
|
Record *II = R.getClass("IInst");
|
|
|
|
Record *WI = R.getClass("WInst");
|
2013-04-17 05:18:42 +08:00
|
|
|
Record *SOpI = R.getClass("SOpInst");
|
|
|
|
Record *IOpI = R.getClass("IOpInst");
|
|
|
|
Record *WOpI = R.getClass("WOpInst");
|
|
|
|
Record *LOpI = R.getClass("LOpInst");
|
|
|
|
Record *NoTestOpI = R.getClass("NoTestOpInst");
|
|
|
|
|
2012-06-13 13:12:41 +08:00
|
|
|
ClassMap[SI] = ClassS;
|
|
|
|
ClassMap[II] = ClassI;
|
|
|
|
ClassMap[WI] = ClassW;
|
2013-04-17 05:18:42 +08:00
|
|
|
ClassMap[SOpI] = ClassS;
|
|
|
|
ClassMap[IOpI] = ClassI;
|
|
|
|
ClassMap[WOpI] = ClassW;
|
|
|
|
ClassMap[LOpI] = ClassL;
|
|
|
|
ClassMap[NoTestOpI] = ClassNoTest;
|
2012-06-13 13:12:41 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// run - Emit arm_neon.h.inc
|
|
|
|
void run(raw_ostream &o);
|
|
|
|
|
|
|
|
// runHeader - Emit all the __builtin prototypes used in arm_neon.h
|
|
|
|
void runHeader(raw_ostream &o);
|
|
|
|
|
|
|
|
// runTests - Emit tests for all the Neon intrinsics.
|
|
|
|
void runTests(raw_ostream &o);
|
|
|
|
|
|
|
|
private:
|
2013-08-01 17:23:19 +08:00
|
|
|
void emitIntrinsic(raw_ostream &OS, Record *R,
|
|
|
|
StringMap<ClassKind> &EmittedMap);
|
|
|
|
void genBuiltinsDef(raw_ostream &OS, StringMap<ClassKind> &A64IntrinsicMap,
|
|
|
|
bool isA64GenBuiltinDef);
|
|
|
|
void genOverloadTypeCheckCode(raw_ostream &OS,
|
|
|
|
StringMap<ClassKind> &A64IntrinsicMap,
|
|
|
|
bool isA64TypeCheck);
|
|
|
|
void genIntrinsicRangeCheckCode(raw_ostream &OS,
|
|
|
|
StringMap<ClassKind> &A64IntrinsicMap,
|
|
|
|
bool isA64RangeCheck);
|
|
|
|
void genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
|
|
|
|
bool isA64TestGen);
|
2012-06-13 13:12:41 +08:00
|
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
|
2011-10-06 21:03:08 +08:00
|
|
|
/// ParseTypes - break down a string such as "fQf" into a vector of StringRefs,
|
|
|
|
/// which each StringRef representing a single type declared in the string.
|
|
|
|
/// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing
|
|
|
|
/// 2xfloat and 4xfloat respectively.
|
|
|
|
static void ParseTypes(Record *r, std::string &s,
|
|
|
|
SmallVectorImpl<StringRef> &TV) {
|
|
|
|
const char *data = s.data();
|
|
|
|
int len = 0;
|
|
|
|
|
|
|
|
for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) {
|
2013-08-15 16:26:30 +08:00
|
|
|
if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U'
|
2013-08-29 15:55:15 +08:00
|
|
|
|| data[len] == 'H' || data[len] == 'S')
|
2011-10-06 21:03:08 +08:00
|
|
|
continue;
|
|
|
|
|
|
|
|
switch (data[len]) {
|
|
|
|
case 'c':
|
|
|
|
case 's':
|
|
|
|
case 'i':
|
|
|
|
case 'l':
|
|
|
|
case 'h':
|
|
|
|
case 'f':
|
2013-08-01 17:23:19 +08:00
|
|
|
case 'd':
|
2011-10-06 21:03:08 +08:00
|
|
|
break;
|
|
|
|
default:
|
2012-10-26 00:37:08 +08:00
|
|
|
PrintFatalError(r->getLoc(),
|
2011-10-06 21:03:08 +08:00
|
|
|
"Unexpected letter: " + std::string(data + len, 1));
|
|
|
|
}
|
|
|
|
TV.push_back(StringRef(data, len + 1));
|
|
|
|
data += len + 1;
|
|
|
|
len = -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Widen - Convert a type code into the next wider type. char -> short,
|
|
|
|
/// short -> int, etc.
|
|
|
|
static char Widen(const char t) {
|
|
|
|
switch (t) {
|
|
|
|
case 'c':
|
|
|
|
return 's';
|
|
|
|
case 's':
|
|
|
|
return 'i';
|
|
|
|
case 'i':
|
|
|
|
return 'l';
|
|
|
|
case 'h':
|
|
|
|
return 'f';
|
2013-11-14 10:45:18 +08:00
|
|
|
case 'f':
|
|
|
|
return 'd';
|
2012-10-26 00:37:08 +08:00
|
|
|
default:
|
|
|
|
PrintFatalError("unhandled type in widen!");
|
2011-10-06 21:03:08 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Narrow - Convert a type code into the next smaller type. short -> char,
|
|
|
|
/// float -> half float, etc.
|
|
|
|
static char Narrow(const char t) {
|
|
|
|
switch (t) {
|
|
|
|
case 's':
|
|
|
|
return 'c';
|
|
|
|
case 'i':
|
|
|
|
return 's';
|
|
|
|
case 'l':
|
|
|
|
return 'i';
|
|
|
|
case 'f':
|
|
|
|
return 'h';
|
2013-11-14 10:45:18 +08:00
|
|
|
case 'd':
|
|
|
|
return 'f';
|
2012-10-26 00:37:08 +08:00
|
|
|
default:
|
|
|
|
PrintFatalError("unhandled type in narrow!");
|
2011-10-06 21:03:08 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Implement aarch64 neon instruction set AdvSIMD (3V Diff), covering the following 26 instructions,
SADDL, UADDL, SADDW, UADDW, SSUBL, USUBL, SSUBW, USUBW, ADDHN, RADDHN, SABAL, UABAL, SUBHN, RSUBHN, SABDL, UABDL, SMLAL, UMLAL, SMLSL, UMLSL, SQDMLAL, SQDMLSL, SMULL, UMULL, SQDMULL, PMULL
llvm-svn: 190289
2013-09-09 10:21:08 +08:00
|
|
|
static std::string GetNarrowTypestr(StringRef ty)
|
|
|
|
{
|
|
|
|
std::string s;
|
|
|
|
for (size_t i = 0, end = ty.size(); i < end; i++) {
|
|
|
|
switch (ty[i]) {
|
|
|
|
case 's':
|
|
|
|
s += 'c';
|
|
|
|
break;
|
|
|
|
case 'i':
|
|
|
|
s += 's';
|
|
|
|
break;
|
|
|
|
case 'l':
|
|
|
|
s += 'i';
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
s += ty[i];
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2011-10-06 21:03:08 +08:00
|
|
|
/// For a particular StringRef, return the base type code, and whether it has
|
|
|
|
/// the quad-vector, polynomial, or unsigned modifiers set.
|
|
|
|
static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) {
|
|
|
|
unsigned off = 0;
|
2013-08-29 15:55:15 +08:00
|
|
|
// ignore scalar.
|
|
|
|
if (ty[off] == 'S') {
|
|
|
|
++off;
|
|
|
|
}
|
2011-10-06 21:03:08 +08:00
|
|
|
// remember quad.
|
2013-08-15 16:26:30 +08:00
|
|
|
if (ty[off] == 'Q' || ty[off] == 'H') {
|
2011-10-06 21:03:08 +08:00
|
|
|
quad = true;
|
|
|
|
++off;
|
|
|
|
}
|
|
|
|
|
|
|
|
// remember poly.
|
|
|
|
if (ty[off] == 'P') {
|
|
|
|
poly = true;
|
|
|
|
++off;
|
|
|
|
}
|
|
|
|
|
|
|
|
// remember unsigned.
|
|
|
|
if (ty[off] == 'U') {
|
|
|
|
usgn = true;
|
|
|
|
++off;
|
|
|
|
}
|
|
|
|
|
|
|
|
// base type to get the type string for.
|
|
|
|
return ty[off];
|
|
|
|
}
|
|
|
|
|
|
|
|
/// ModType - Transform a type code and its modifiers based on a mod code. The
|
|
|
|
/// mod code definitions may be found at the top of arm_neon.td.
|
|
|
|
static char ModType(const char mod, char type, bool &quad, bool &poly,
|
|
|
|
bool &usgn, bool &scal, bool &cnst, bool &pntr) {
|
|
|
|
switch (mod) {
|
|
|
|
case 't':
|
|
|
|
if (poly) {
|
|
|
|
poly = false;
|
|
|
|
usgn = true;
|
|
|
|
}
|
|
|
|
break;
|
2013-10-30 23:20:07 +08:00
|
|
|
case 'b':
|
|
|
|
scal = true;
|
2011-10-06 21:03:08 +08:00
|
|
|
case 'u':
|
|
|
|
usgn = true;
|
|
|
|
poly = false;
|
|
|
|
if (type == 'f')
|
|
|
|
type = 'i';
|
2013-08-01 17:23:19 +08:00
|
|
|
if (type == 'd')
|
|
|
|
type = 'l';
|
2011-10-06 21:03:08 +08:00
|
|
|
break;
|
2013-11-12 02:04:22 +08:00
|
|
|
case '$':
|
|
|
|
scal = true;
|
2011-10-06 21:03:08 +08:00
|
|
|
case 'x':
|
|
|
|
usgn = false;
|
|
|
|
poly = false;
|
|
|
|
if (type == 'f')
|
|
|
|
type = 'i';
|
Inplement aarch64 neon instructions in AdvSIMD(shift). About 24 shift instructions:
sshr,ushr,ssra,usra,srshr,urshr,srsra,ursra,sri,shl,sli,sqshlu,sqshl,uqshl,shrn,sqrshr$
and 4 convert instructions:
scvtf,ucvtf,fcvtzs,fcvtzu
llvm-svn: 189926
2013-09-04 17:29:13 +08:00
|
|
|
if (type == 'd')
|
|
|
|
type = 'l';
|
2011-10-06 21:03:08 +08:00
|
|
|
break;
|
2013-10-09 04:43:46 +08:00
|
|
|
case 'o':
|
|
|
|
scal = true;
|
|
|
|
type = 'd';
|
|
|
|
usgn = false;
|
|
|
|
break;
|
|
|
|
case 'y':
|
|
|
|
scal = true;
|
2011-10-06 21:03:08 +08:00
|
|
|
case 'f':
|
|
|
|
if (type == 'h')
|
|
|
|
quad = true;
|
|
|
|
type = 'f';
|
|
|
|
usgn = false;
|
|
|
|
break;
|
|
|
|
case 'g':
|
|
|
|
quad = false;
|
|
|
|
break;
|
2013-11-14 09:57:55 +08:00
|
|
|
case 'B':
|
|
|
|
case 'C':
|
|
|
|
case 'D':
|
2013-10-04 17:21:17 +08:00
|
|
|
case 'j':
|
|
|
|
quad = true;
|
|
|
|
break;
|
2011-10-06 21:03:08 +08:00
|
|
|
case 'w':
|
|
|
|
type = Widen(type);
|
|
|
|
quad = true;
|
|
|
|
break;
|
|
|
|
case 'n':
|
|
|
|
type = Widen(type);
|
|
|
|
break;
|
|
|
|
case 'i':
|
|
|
|
type = 'i';
|
|
|
|
scal = true;
|
|
|
|
break;
|
|
|
|
case 'l':
|
|
|
|
type = 'l';
|
|
|
|
scal = true;
|
|
|
|
usgn = true;
|
|
|
|
break;
|
2013-10-18 22:03:36 +08:00
|
|
|
case 'z':
|
|
|
|
type = Narrow(type);
|
|
|
|
scal = true;
|
|
|
|
break;
|
2013-10-05 16:22:55 +08:00
|
|
|
case 'r':
|
|
|
|
type = Widen(type);
|
2013-10-18 22:03:36 +08:00
|
|
|
scal = true;
|
|
|
|
break;
|
2011-10-06 21:03:08 +08:00
|
|
|
case 's':
|
|
|
|
case 'a':
|
|
|
|
scal = true;
|
|
|
|
break;
|
|
|
|
case 'k':
|
|
|
|
quad = true;
|
|
|
|
break;
|
|
|
|
case 'c':
|
|
|
|
cnst = true;
|
|
|
|
case 'p':
|
|
|
|
pntr = true;
|
|
|
|
scal = true;
|
|
|
|
break;
|
|
|
|
case 'h':
|
|
|
|
type = Narrow(type);
|
|
|
|
if (type == 'h')
|
|
|
|
quad = false;
|
|
|
|
break;
|
Implement aarch64 neon instruction set AdvSIMD (3V Diff), covering the following 26 instructions,
SADDL, UADDL, SADDW, UADDW, SSUBL, USUBL, SSUBW, USUBW, ADDHN, RADDHN, SABAL, UABAL, SUBHN, RSUBHN, SABDL, UABDL, SMLAL, UMLAL, SMLSL, UMLSL, SQDMLAL, SQDMLSL, SMULL, UMULL, SQDMULL, PMULL
llvm-svn: 190289
2013-09-09 10:21:08 +08:00
|
|
|
case 'q':
|
|
|
|
type = Narrow(type);
|
|
|
|
quad = true;
|
|
|
|
break;
|
2011-10-06 21:03:08 +08:00
|
|
|
case 'e':
|
|
|
|
type = Narrow(type);
|
|
|
|
usgn = true;
|
|
|
|
break;
|
Inplement aarch64 neon instructions in AdvSIMD(shift). About 24 shift instructions:
sshr,ushr,ssra,usra,srshr,urshr,srsra,ursra,sri,shl,sli,sqshlu,sqshl,uqshl,shrn,sqrshr$
and 4 convert instructions:
scvtf,ucvtf,fcvtzs,fcvtzu
llvm-svn: 189926
2013-09-04 17:29:13 +08:00
|
|
|
case 'm':
|
|
|
|
type = Narrow(type);
|
|
|
|
quad = false;
|
|
|
|
break;
|
2011-10-06 21:03:08 +08:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return type;
|
|
|
|
}
|
|
|
|
|
2013-11-14 09:57:55 +08:00
|
|
|
static bool IsMultiVecProto(const char p) {
|
|
|
|
return ((p >= '2' && p <= '4') || (p >= 'B' && p <= 'D'));
|
|
|
|
}
|
|
|
|
|
2011-10-06 21:03:08 +08:00
|
|
|
/// TypeString - for a modifier and type, generate the name of the typedef for
|
|
|
|
/// that type. QUc -> uint8x8_t.
|
|
|
|
static std::string TypeString(const char mod, StringRef typestr) {
|
|
|
|
bool quad = false;
|
|
|
|
bool poly = false;
|
|
|
|
bool usgn = false;
|
|
|
|
bool scal = false;
|
|
|
|
bool cnst = false;
|
|
|
|
bool pntr = false;
|
|
|
|
|
|
|
|
if (mod == 'v')
|
|
|
|
return "void";
|
|
|
|
if (mod == 'i')
|
|
|
|
return "int";
|
|
|
|
|
|
|
|
// base type to get the type string for.
|
|
|
|
char type = ClassifyType(typestr, quad, poly, usgn);
|
|
|
|
|
|
|
|
// Based on the modifying character, change the type and width if necessary.
|
|
|
|
type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
|
|
|
|
|
|
|
|
SmallString<128> s;
|
|
|
|
|
|
|
|
if (usgn)
|
|
|
|
s.push_back('u');
|
|
|
|
|
|
|
|
switch (type) {
|
|
|
|
case 'c':
|
|
|
|
s += poly ? "poly8" : "int8";
|
|
|
|
if (scal)
|
|
|
|
break;
|
|
|
|
s += quad ? "x16" : "x8";
|
|
|
|
break;
|
|
|
|
case 's':
|
|
|
|
s += poly ? "poly16" : "int16";
|
|
|
|
if (scal)
|
|
|
|
break;
|
|
|
|
s += quad ? "x8" : "x4";
|
|
|
|
break;
|
|
|
|
case 'i':
|
|
|
|
s += "int32";
|
|
|
|
if (scal)
|
|
|
|
break;
|
|
|
|
s += quad ? "x4" : "x2";
|
|
|
|
break;
|
|
|
|
case 'l':
|
2013-11-14 11:29:16 +08:00
|
|
|
s += (poly && !usgn)? "poly64" : "int64";
|
2011-10-06 21:03:08 +08:00
|
|
|
if (scal)
|
|
|
|
break;
|
|
|
|
s += quad ? "x2" : "x1";
|
|
|
|
break;
|
|
|
|
case 'h':
|
|
|
|
s += "float16";
|
|
|
|
if (scal)
|
|
|
|
break;
|
|
|
|
s += quad ? "x8" : "x4";
|
|
|
|
break;
|
|
|
|
case 'f':
|
|
|
|
s += "float32";
|
|
|
|
if (scal)
|
|
|
|
break;
|
|
|
|
s += quad ? "x4" : "x2";
|
|
|
|
break;
|
2013-08-01 17:23:19 +08:00
|
|
|
case 'd':
|
|
|
|
s += "float64";
|
|
|
|
if (scal)
|
|
|
|
break;
|
|
|
|
s += quad ? "x2" : "x1";
|
|
|
|
break;
|
|
|
|
|
2011-10-06 21:03:08 +08:00
|
|
|
default:
|
2012-10-26 00:37:08 +08:00
|
|
|
PrintFatalError("unhandled type!");
|
2011-10-06 21:03:08 +08:00
|
|
|
}
|
|
|
|
|
2013-11-14 09:57:55 +08:00
|
|
|
if (mod == '2' || mod == 'B')
|
2011-10-06 21:03:08 +08:00
|
|
|
s += "x2";
|
2013-11-14 09:57:55 +08:00
|
|
|
if (mod == '3' || mod == 'C')
|
2011-10-06 21:03:08 +08:00
|
|
|
s += "x3";
|
2013-11-14 09:57:55 +08:00
|
|
|
if (mod == '4' || mod == 'D')
|
2011-10-06 21:03:08 +08:00
|
|
|
s += "x4";
|
|
|
|
|
|
|
|
// Append _t, finishing the type string typedef type.
|
|
|
|
s += "_t";
|
|
|
|
|
|
|
|
if (cnst)
|
|
|
|
s += " const";
|
|
|
|
|
|
|
|
if (pntr)
|
|
|
|
s += " *";
|
|
|
|
|
|
|
|
return s.str();
|
|
|
|
}
|
|
|
|
|
|
|
|
/// BuiltinTypeString - for a modifier and type, generate the clang
|
|
|
|
/// BuiltinsARM.def prototype code for the function. See the top of clang's
|
|
|
|
/// Builtins.def for a description of the type strings.
|
|
|
|
static std::string BuiltinTypeString(const char mod, StringRef typestr,
|
|
|
|
ClassKind ck, bool ret) {
|
|
|
|
bool quad = false;
|
|
|
|
bool poly = false;
|
|
|
|
bool usgn = false;
|
|
|
|
bool scal = false;
|
|
|
|
bool cnst = false;
|
|
|
|
bool pntr = false;
|
|
|
|
|
|
|
|
if (mod == 'v')
|
|
|
|
return "v"; // void
|
|
|
|
if (mod == 'i')
|
|
|
|
return "i"; // int
|
|
|
|
|
|
|
|
// base type to get the type string for.
|
|
|
|
char type = ClassifyType(typestr, quad, poly, usgn);
|
|
|
|
|
|
|
|
// Based on the modifying character, change the type and width if necessary.
|
|
|
|
type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
|
|
|
|
|
|
|
|
// All pointers are void* pointers. Change type to 'v' now.
|
|
|
|
if (pntr) {
|
|
|
|
usgn = false;
|
|
|
|
poly = false;
|
|
|
|
type = 'v';
|
|
|
|
}
|
|
|
|
// Treat half-float ('h') types as unsigned short ('s') types.
|
|
|
|
if (type == 'h') {
|
|
|
|
type = 's';
|
|
|
|
usgn = true;
|
|
|
|
}
|
2013-10-04 17:21:17 +08:00
|
|
|
usgn = usgn | poly | ((ck == ClassI || ck == ClassW) &&
|
|
|
|
scal && type != 'f' && type != 'd');
|
2011-10-06 21:03:08 +08:00
|
|
|
|
|
|
|
if (scal) {
|
|
|
|
SmallString<128> s;
|
|
|
|
|
|
|
|
if (usgn)
|
|
|
|
s.push_back('U');
|
|
|
|
else if (type == 'c')
|
|
|
|
s.push_back('S'); // make chars explicitly signed
|
|
|
|
|
|
|
|
if (type == 'l') // 64-bit long
|
|
|
|
s += "LLi";
|
|
|
|
else
|
|
|
|
s.push_back(type);
|
|
|
|
|
|
|
|
if (cnst)
|
|
|
|
s.push_back('C');
|
|
|
|
if (pntr)
|
|
|
|
s.push_back('*');
|
|
|
|
return s.str();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Since the return value must be one type, return a vector type of the
|
|
|
|
// appropriate width which we will bitcast. An exception is made for
|
|
|
|
// returning structs of 2, 3, or 4 vectors which are returned in a sret-like
|
|
|
|
// fashion, storing them to a pointer arg.
|
|
|
|
if (ret) {
|
2013-11-14 09:57:55 +08:00
|
|
|
if (IsMultiVecProto(mod))
|
2011-10-06 21:03:08 +08:00
|
|
|
return "vv*"; // void result with void* first argument
|
|
|
|
if (mod == 'f' || (ck != ClassB && type == 'f'))
|
|
|
|
return quad ? "V4f" : "V2f";
|
2013-10-04 17:21:17 +08:00
|
|
|
if (ck != ClassB && type == 'd')
|
|
|
|
return quad ? "V2d" : "V1d";
|
2011-10-06 21:03:08 +08:00
|
|
|
if (ck != ClassB && type == 's')
|
|
|
|
return quad ? "V8s" : "V4s";
|
|
|
|
if (ck != ClassB && type == 'i')
|
|
|
|
return quad ? "V4i" : "V2i";
|
|
|
|
if (ck != ClassB && type == 'l')
|
|
|
|
return quad ? "V2LLi" : "V1LLi";
|
|
|
|
|
|
|
|
return quad ? "V16Sc" : "V8Sc";
|
|
|
|
}
|
|
|
|
|
|
|
|
// Non-return array types are passed as individual vectors.
|
2013-11-14 09:57:55 +08:00
|
|
|
if (mod == '2' || mod == 'B')
|
2011-10-06 21:03:08 +08:00
|
|
|
return quad ? "V16ScV16Sc" : "V8ScV8Sc";
|
2013-11-14 09:57:55 +08:00
|
|
|
if (mod == '3' || mod == 'C')
|
2011-10-06 21:03:08 +08:00
|
|
|
return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc";
|
2013-11-14 09:57:55 +08:00
|
|
|
if (mod == '4' || mod == 'D')
|
2011-10-06 21:03:08 +08:00
|
|
|
return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc";
|
|
|
|
|
|
|
|
if (mod == 'f' || (ck != ClassB && type == 'f'))
|
|
|
|
return quad ? "V4f" : "V2f";
|
2013-10-04 17:21:17 +08:00
|
|
|
if (ck != ClassB && type == 'd')
|
|
|
|
return quad ? "V2d" : "V1d";
|
2011-10-06 21:03:08 +08:00
|
|
|
if (ck != ClassB && type == 's')
|
|
|
|
return quad ? "V8s" : "V4s";
|
|
|
|
if (ck != ClassB && type == 'i')
|
|
|
|
return quad ? "V4i" : "V2i";
|
|
|
|
if (ck != ClassB && type == 'l')
|
|
|
|
return quad ? "V2LLi" : "V1LLi";
|
|
|
|
|
|
|
|
return quad ? "V16Sc" : "V8Sc";
|
|
|
|
}
|
|
|
|
|
2013-04-17 06:07:30 +08:00
|
|
|
/// InstructionTypeCode - Computes the ARM argument character code and
|
|
|
|
/// quad status for a specific type string and ClassKind.
|
|
|
|
static void InstructionTypeCode(const StringRef &typeStr,
|
|
|
|
const ClassKind ck,
|
|
|
|
bool &quad,
|
|
|
|
std::string &typeCode) {
|
2011-10-06 21:03:08 +08:00
|
|
|
bool poly = false;
|
|
|
|
bool usgn = false;
|
2013-04-17 06:07:30 +08:00
|
|
|
char type = ClassifyType(typeStr, quad, poly, usgn);
|
2011-10-06 21:03:08 +08:00
|
|
|
|
|
|
|
switch (type) {
|
|
|
|
case 'c':
|
|
|
|
switch (ck) {
|
2013-04-17 06:07:30 +08:00
|
|
|
case ClassS: typeCode = poly ? "p8" : usgn ? "u8" : "s8"; break;
|
|
|
|
case ClassI: typeCode = "i8"; break;
|
|
|
|
case ClassW: typeCode = "8"; break;
|
2011-10-06 21:03:08 +08:00
|
|
|
default: break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 's':
|
|
|
|
switch (ck) {
|
2013-04-17 06:07:30 +08:00
|
|
|
case ClassS: typeCode = poly ? "p16" : usgn ? "u16" : "s16"; break;
|
|
|
|
case ClassI: typeCode = "i16"; break;
|
|
|
|
case ClassW: typeCode = "16"; break;
|
2011-10-06 21:03:08 +08:00
|
|
|
default: break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 'i':
|
|
|
|
switch (ck) {
|
2013-04-17 06:07:30 +08:00
|
|
|
case ClassS: typeCode = usgn ? "u32" : "s32"; break;
|
|
|
|
case ClassI: typeCode = "i32"; break;
|
|
|
|
case ClassW: typeCode = "32"; break;
|
2011-10-06 21:03:08 +08:00
|
|
|
default: break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 'l':
|
|
|
|
switch (ck) {
|
2013-11-14 11:29:16 +08:00
|
|
|
case ClassS: typeCode = poly ? "p64" : usgn ? "u64" : "s64"; break;
|
2013-04-17 06:07:30 +08:00
|
|
|
case ClassI: typeCode = "i64"; break;
|
|
|
|
case ClassW: typeCode = "64"; break;
|
2011-10-06 21:03:08 +08:00
|
|
|
default: break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 'h':
|
|
|
|
switch (ck) {
|
|
|
|
case ClassS:
|
2013-04-17 06:07:30 +08:00
|
|
|
case ClassI: typeCode = "f16"; break;
|
|
|
|
case ClassW: typeCode = "16"; break;
|
2011-10-06 21:03:08 +08:00
|
|
|
default: break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 'f':
|
|
|
|
switch (ck) {
|
|
|
|
case ClassS:
|
2013-04-17 06:07:30 +08:00
|
|
|
case ClassI: typeCode = "f32"; break;
|
|
|
|
case ClassW: typeCode = "32"; break;
|
2011-10-06 21:03:08 +08:00
|
|
|
default: break;
|
|
|
|
}
|
|
|
|
break;
|
2013-08-01 17:23:19 +08:00
|
|
|
case 'd':
|
|
|
|
switch (ck) {
|
|
|
|
case ClassS:
|
|
|
|
case ClassI:
|
|
|
|
typeCode += "f64";
|
|
|
|
break;
|
|
|
|
case ClassW:
|
|
|
|
PrintFatalError("unhandled type!");
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
2011-10-06 21:03:08 +08:00
|
|
|
default:
|
2012-10-26 00:37:08 +08:00
|
|
|
PrintFatalError("unhandled type!");
|
2011-10-06 21:03:08 +08:00
|
|
|
}
|
2013-04-17 06:07:30 +08:00
|
|
|
}
|
|
|
|
|
2013-08-29 15:55:15 +08:00
|
|
|
static char Insert_BHSD_Suffix(StringRef typestr){
|
|
|
|
unsigned off = 0;
|
|
|
|
if(typestr[off++] == 'S'){
|
|
|
|
while(typestr[off] == 'Q' || typestr[off] == 'H'||
|
|
|
|
typestr[off] == 'P' || typestr[off] == 'U')
|
|
|
|
++off;
|
|
|
|
switch (typestr[off]){
|
|
|
|
default : break;
|
|
|
|
case 'c' : return 'b';
|
|
|
|
case 's' : return 'h';
|
|
|
|
case 'i' :
|
|
|
|
case 'f' : return 's';
|
|
|
|
case 'l' :
|
|
|
|
case 'd' : return 'd';
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-04-17 06:07:30 +08:00
|
|
|
/// MangleName - Append a type or width suffix to a base neon function name,
|
2013-08-15 16:26:30 +08:00
|
|
|
/// and insert a 'q' in the appropriate location if type string starts with 'Q'.
|
|
|
|
/// E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
|
2013-08-29 15:55:15 +08:00
|
|
|
/// Insert proper 'b' 'h' 's' 'd' if prefix 'S' is used.
|
2013-04-17 06:07:30 +08:00
|
|
|
static std::string MangleName(const std::string &name, StringRef typestr,
|
|
|
|
ClassKind ck) {
|
2013-11-14 10:45:18 +08:00
|
|
|
if (name == "vcvt_f32_f16" || name == "vcvt_f32_f64")
|
2013-04-17 06:07:30 +08:00
|
|
|
return name;
|
|
|
|
|
|
|
|
bool quad = false;
|
|
|
|
std::string typeCode = "";
|
|
|
|
|
|
|
|
InstructionTypeCode(typestr, ck, quad, typeCode);
|
|
|
|
|
|
|
|
std::string s = name;
|
|
|
|
|
|
|
|
if (typeCode.size() > 0) {
|
|
|
|
s += "_" + typeCode;
|
|
|
|
}
|
|
|
|
|
2011-10-06 21:03:08 +08:00
|
|
|
if (ck == ClassB)
|
|
|
|
s += "_v";
|
|
|
|
|
|
|
|
// Insert a 'q' before the first '_' character so that it ends up before
|
|
|
|
// _lane or _n on vector-scalar operations.
|
2013-08-29 15:55:15 +08:00
|
|
|
if (typestr.find("Q") != StringRef::npos) {
|
2013-08-15 16:26:30 +08:00
|
|
|
size_t pos = s.find('_');
|
|
|
|
s = s.insert(pos, "q");
|
2011-10-06 21:03:08 +08:00
|
|
|
}
|
2013-08-29 15:55:15 +08:00
|
|
|
char ins = Insert_BHSD_Suffix(typestr);
|
|
|
|
if(ins){
|
|
|
|
size_t pos = s.find('_');
|
|
|
|
s = s.insert(pos, &ins, 1);
|
|
|
|
}
|
2013-04-17 07:00:26 +08:00
|
|
|
|
2011-10-06 21:03:08 +08:00
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2013-04-17 07:00:26 +08:00
|
|
|
static void PreprocessInstruction(const StringRef &Name,
|
|
|
|
const std::string &InstName,
|
|
|
|
std::string &Prefix,
|
|
|
|
bool &HasNPostfix,
|
|
|
|
bool &HasLanePostfix,
|
|
|
|
bool &HasDupPostfix,
|
|
|
|
bool &IsSpecialVCvt,
|
|
|
|
size_t &TBNumber) {
|
|
|
|
// All of our instruction name fields from arm_neon.td are of the form
|
|
|
|
// <instructionname>_...
|
|
|
|
// Thus we grab our instruction name via computation of said Prefix.
|
|
|
|
const size_t PrefixEnd = Name.find_first_of('_');
|
|
|
|
// If InstName is passed in, we use that instead of our name Prefix.
|
|
|
|
Prefix = InstName.size() == 0? Name.slice(0, PrefixEnd).str() : InstName;
|
|
|
|
|
|
|
|
const StringRef Postfix = Name.slice(PrefixEnd, Name.size());
|
|
|
|
|
|
|
|
HasNPostfix = Postfix.count("_n");
|
|
|
|
HasLanePostfix = Postfix.count("_lane");
|
|
|
|
HasDupPostfix = Postfix.count("_dup");
|
|
|
|
IsSpecialVCvt = Postfix.size() != 0 && Name.count("vcvt");
|
|
|
|
|
|
|
|
if (InstName.compare("vtbl") == 0 ||
|
|
|
|
InstName.compare("vtbx") == 0) {
|
|
|
|
// If we have a vtblN/vtbxN instruction, use the instruction's ASCII
|
|
|
|
// encoding to get its true value.
|
|
|
|
TBNumber = Name[Name.size()-1] - 48;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// GenerateRegisterCheckPatternsForLoadStores - Given a bunch of data we have
|
|
|
|
/// extracted, generate a FileCheck pattern for a Load Or Store
|
|
|
|
static void
|
|
|
|
GenerateRegisterCheckPatternForLoadStores(const StringRef &NameRef,
|
|
|
|
const std::string& OutTypeCode,
|
|
|
|
const bool &IsQuad,
|
|
|
|
const bool &HasDupPostfix,
|
|
|
|
const bool &HasLanePostfix,
|
|
|
|
const size_t Count,
|
|
|
|
std::string &RegisterSuffix) {
|
|
|
|
const bool IsLDSTOne = NameRef.count("vld1") || NameRef.count("vst1");
|
|
|
|
// If N == 3 || N == 4 and we are dealing with a quad instruction, Clang
|
|
|
|
// will output a series of v{ld,st}1s, so we have to handle it specially.
|
|
|
|
if ((Count == 3 || Count == 4) && IsQuad) {
|
|
|
|
RegisterSuffix += "{";
|
|
|
|
for (size_t i = 0; i < Count; i++) {
|
|
|
|
RegisterSuffix += "d{{[0-9]+}}";
|
|
|
|
if (HasDupPostfix) {
|
|
|
|
RegisterSuffix += "[]";
|
|
|
|
}
|
|
|
|
if (HasLanePostfix) {
|
|
|
|
RegisterSuffix += "[{{[0-9]+}}]";
|
|
|
|
}
|
|
|
|
if (i < Count-1) {
|
|
|
|
RegisterSuffix += ", ";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
RegisterSuffix += "}";
|
|
|
|
} else {
|
|
|
|
|
|
|
|
// Handle normal loads and stores.
|
|
|
|
RegisterSuffix += "{";
|
|
|
|
for (size_t i = 0; i < Count; i++) {
|
|
|
|
RegisterSuffix += "d{{[0-9]+}}";
|
|
|
|
if (HasDupPostfix) {
|
|
|
|
RegisterSuffix += "[]";
|
|
|
|
}
|
|
|
|
if (HasLanePostfix) {
|
|
|
|
RegisterSuffix += "[{{[0-9]+}}]";
|
|
|
|
}
|
|
|
|
if (IsQuad && !HasLanePostfix) {
|
|
|
|
RegisterSuffix += ", d{{[0-9]+}}";
|
|
|
|
if (HasDupPostfix) {
|
|
|
|
RegisterSuffix += "[]";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (i < Count-1) {
|
|
|
|
RegisterSuffix += ", ";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
RegisterSuffix += "}, [r{{[0-9]+}}";
|
|
|
|
|
|
|
|
// We only include the alignment hint if we have a vld1.*64 or
|
|
|
|
// a dup/lane instruction.
|
|
|
|
if (IsLDSTOne) {
|
|
|
|
if ((HasLanePostfix || HasDupPostfix) && OutTypeCode != "8") {
|
2013-06-25 05:25:37 +08:00
|
|
|
RegisterSuffix += ":" + OutTypeCode;
|
2013-04-17 07:00:26 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
RegisterSuffix += "]";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool HasNPostfixAndScalarArgs(const StringRef &NameRef,
|
|
|
|
const bool &HasNPostfix) {
|
|
|
|
return (NameRef.count("vmla") ||
|
|
|
|
NameRef.count("vmlal") ||
|
|
|
|
NameRef.count("vmlsl") ||
|
|
|
|
NameRef.count("vmull") ||
|
|
|
|
NameRef.count("vqdmlal") ||
|
|
|
|
NameRef.count("vqdmlsl") ||
|
|
|
|
NameRef.count("vqdmulh") ||
|
|
|
|
NameRef.count("vqdmull") ||
|
|
|
|
NameRef.count("vqrdmulh")) && HasNPostfix;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool IsFiveOperandLaneAccumulator(const StringRef &NameRef,
|
|
|
|
const bool &HasLanePostfix) {
|
|
|
|
return (NameRef.count("vmla") ||
|
|
|
|
NameRef.count("vmls") ||
|
|
|
|
NameRef.count("vmlal") ||
|
|
|
|
NameRef.count("vmlsl") ||
|
|
|
|
(NameRef.count("vmul") && NameRef.size() == 3)||
|
|
|
|
NameRef.count("vqdmlal") ||
|
|
|
|
NameRef.count("vqdmlsl") ||
|
|
|
|
NameRef.count("vqdmulh") ||
|
|
|
|
NameRef.count("vqrdmulh")) && HasLanePostfix;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool IsSpecialLaneMultiply(const StringRef &NameRef,
|
|
|
|
const bool &HasLanePostfix,
|
|
|
|
const bool &IsQuad) {
|
|
|
|
const bool IsVMulOrMulh = (NameRef.count("vmul") || NameRef.count("mulh"))
|
|
|
|
&& IsQuad;
|
|
|
|
const bool IsVMull = NameRef.count("mull") && !IsQuad;
|
|
|
|
return (IsVMulOrMulh || IsVMull) && HasLanePostfix;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void NormalizeProtoForRegisterPatternCreation(const std::string &Name,
|
|
|
|
const std::string &Proto,
|
|
|
|
const bool &HasNPostfix,
|
|
|
|
const bool &IsQuad,
|
|
|
|
const bool &HasLanePostfix,
|
|
|
|
const bool &HasDupPostfix,
|
|
|
|
std::string &NormedProto) {
|
|
|
|
// Handle generic case.
|
|
|
|
const StringRef NameRef(Name);
|
|
|
|
for (size_t i = 0, end = Proto.size(); i < end; i++) {
|
|
|
|
switch (Proto[i]) {
|
|
|
|
case 'u':
|
|
|
|
case 'f':
|
|
|
|
case 'd':
|
|
|
|
case 's':
|
|
|
|
case 'x':
|
|
|
|
case 't':
|
|
|
|
case 'n':
|
|
|
|
NormedProto += IsQuad? 'q' : 'd';
|
|
|
|
break;
|
|
|
|
case 'w':
|
|
|
|
case 'k':
|
|
|
|
NormedProto += 'q';
|
|
|
|
break;
|
|
|
|
case 'g':
|
2013-10-04 17:21:17 +08:00
|
|
|
case 'j':
|
2013-04-17 07:00:26 +08:00
|
|
|
case 'h':
|
|
|
|
case 'e':
|
|
|
|
NormedProto += 'd';
|
|
|
|
break;
|
|
|
|
case 'i':
|
|
|
|
NormedProto += HasLanePostfix? 'a' : 'i';
|
|
|
|
break;
|
|
|
|
case 'a':
|
|
|
|
if (HasLanePostfix) {
|
|
|
|
NormedProto += 'a';
|
|
|
|
} else if (HasNPostfixAndScalarArgs(NameRef, HasNPostfix)) {
|
|
|
|
NormedProto += IsQuad? 'q' : 'd';
|
|
|
|
} else {
|
|
|
|
NormedProto += 'i';
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Handle Special Cases.
|
|
|
|
const bool IsNotVExt = !NameRef.count("vext");
|
|
|
|
const bool IsVPADAL = NameRef.count("vpadal");
|
|
|
|
const bool Is5OpLaneAccum = IsFiveOperandLaneAccumulator(NameRef,
|
|
|
|
HasLanePostfix);
|
|
|
|
const bool IsSpecialLaneMul = IsSpecialLaneMultiply(NameRef, HasLanePostfix,
|
|
|
|
IsQuad);
|
|
|
|
|
|
|
|
if (IsSpecialLaneMul) {
|
|
|
|
// If
|
|
|
|
NormedProto[2] = NormedProto[3];
|
|
|
|
NormedProto.erase(3);
|
|
|
|
} else if (NormedProto.size() == 4 &&
|
|
|
|
NormedProto[0] == NormedProto[1] &&
|
|
|
|
IsNotVExt) {
|
|
|
|
// If NormedProto.size() == 4 and the first two proto characters are the
|
|
|
|
// same, ignore the first.
|
|
|
|
NormedProto = NormedProto.substr(1, 3);
|
|
|
|
} else if (Is5OpLaneAccum) {
|
|
|
|
// If we have a 5 op lane accumulator operation, we take characters 1,2,4
|
|
|
|
std::string tmp = NormedProto.substr(1,2);
|
|
|
|
tmp += NormedProto[4];
|
|
|
|
NormedProto = tmp;
|
|
|
|
} else if (IsVPADAL) {
|
|
|
|
// If we have VPADAL, ignore the first character.
|
|
|
|
NormedProto = NormedProto.substr(0, 2);
|
|
|
|
} else if (NameRef.count("vdup") && NormedProto.size() > 2) {
|
|
|
|
// If our instruction is a dup instruction, keep only the first and
|
|
|
|
// last characters.
|
|
|
|
std::string tmp = "";
|
|
|
|
tmp += NormedProto[0];
|
|
|
|
tmp += NormedProto[NormedProto.size()-1];
|
|
|
|
NormedProto = tmp;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// GenerateRegisterCheckPatterns - Given a bunch of data we have
|
|
|
|
/// extracted, generate a FileCheck pattern to check that an
|
|
|
|
/// instruction's arguments are correct.
|
|
|
|
static void GenerateRegisterCheckPattern(const std::string &Name,
|
|
|
|
const std::string &Proto,
|
|
|
|
const std::string &OutTypeCode,
|
|
|
|
const bool &HasNPostfix,
|
|
|
|
const bool &IsQuad,
|
|
|
|
const bool &HasLanePostfix,
|
|
|
|
const bool &HasDupPostfix,
|
|
|
|
const size_t &TBNumber,
|
|
|
|
std::string &RegisterSuffix) {
|
|
|
|
|
|
|
|
RegisterSuffix = "";
|
|
|
|
|
|
|
|
const StringRef NameRef(Name);
|
|
|
|
const StringRef ProtoRef(Proto);
|
|
|
|
|
|
|
|
if ((NameRef.count("vdup") || NameRef.count("vmov")) && HasNPostfix) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
const bool IsLoadStore = NameRef.count("vld") || NameRef.count("vst");
|
|
|
|
const bool IsTBXOrTBL = NameRef.count("vtbl") || NameRef.count("vtbx");
|
|
|
|
|
|
|
|
if (IsLoadStore) {
|
|
|
|
// Grab N value from v{ld,st}N using its ascii representation.
|
|
|
|
const size_t Count = NameRef[3] - 48;
|
|
|
|
|
|
|
|
GenerateRegisterCheckPatternForLoadStores(NameRef, OutTypeCode, IsQuad,
|
|
|
|
HasDupPostfix, HasLanePostfix,
|
|
|
|
Count, RegisterSuffix);
|
|
|
|
} else if (IsTBXOrTBL) {
|
|
|
|
RegisterSuffix += "d{{[0-9]+}}, {";
|
|
|
|
for (size_t i = 0; i < TBNumber-1; i++) {
|
|
|
|
RegisterSuffix += "d{{[0-9]+}}, ";
|
|
|
|
}
|
|
|
|
RegisterSuffix += "d{{[0-9]+}}}, d{{[0-9]+}}";
|
|
|
|
} else {
|
|
|
|
// Handle a normal instruction.
|
|
|
|
if (NameRef.count("vget") || NameRef.count("vset"))
|
|
|
|
return;
|
|
|
|
|
|
|
|
// We first normalize our proto, since we only need to emit 4
|
|
|
|
// different types of checks, yet have more than 4 proto types
|
|
|
|
// that map onto those 4 patterns.
|
|
|
|
std::string NormalizedProto("");
|
|
|
|
NormalizeProtoForRegisterPatternCreation(Name, Proto, HasNPostfix, IsQuad,
|
|
|
|
HasLanePostfix, HasDupPostfix,
|
|
|
|
NormalizedProto);
|
|
|
|
|
|
|
|
for (size_t i = 0, end = NormalizedProto.size(); i < end; i++) {
|
|
|
|
const char &c = NormalizedProto[i];
|
|
|
|
switch (c) {
|
|
|
|
case 'q':
|
|
|
|
RegisterSuffix += "q{{[0-9]+}}, ";
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'd':
|
|
|
|
RegisterSuffix += "d{{[0-9]+}}, ";
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'i':
|
|
|
|
RegisterSuffix += "#{{[0-9]+}}, ";
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'a':
|
|
|
|
RegisterSuffix += "d{{[0-9]+}}[{{[0-9]}}], ";
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Remove extra ", ".
|
|
|
|
RegisterSuffix = RegisterSuffix.substr(0, RegisterSuffix.size()-2);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// GenerateChecksForIntrinsic - Given a specific instruction name +
|
|
|
|
/// typestr + class kind, generate the proper set of FileCheck
|
|
|
|
/// Patterns to check for. We could just return a string, but instead
|
|
|
|
/// use a vector since it provides us with the extra flexibility of
|
|
|
|
/// emitting multiple checks, which comes in handy for certain cases
|
|
|
|
/// like mla where we want to check for 2 different instructions.
|
|
|
|
static void GenerateChecksForIntrinsic(const std::string &Name,
|
|
|
|
const std::string &Proto,
|
|
|
|
StringRef &OutTypeStr,
|
|
|
|
StringRef &InTypeStr,
|
|
|
|
ClassKind Ck,
|
|
|
|
const std::string &InstName,
|
|
|
|
bool IsHiddenLOp,
|
|
|
|
std::vector<std::string>& Result) {
|
|
|
|
|
|
|
|
// If Ck is a ClassNoTest instruction, just return so no test is
|
|
|
|
// emitted.
|
|
|
|
if(Ck == ClassNoTest)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (Name == "vcvt_f32_f16") {
|
|
|
|
Result.push_back("vcvt.f32.f16");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Now we preprocess our instruction given the data we have to get the
|
|
|
|
// data that we need.
|
|
|
|
// Create a StringRef for String Manipulation of our Name.
|
|
|
|
const StringRef NameRef(Name);
|
|
|
|
// Instruction Prefix.
|
|
|
|
std::string Prefix;
|
|
|
|
// The type code for our out type string.
|
|
|
|
std::string OutTypeCode;
|
|
|
|
// To handle our different cases, we need to check for different postfixes.
|
|
|
|
// Is our instruction a quad instruction.
|
|
|
|
bool IsQuad = false;
|
|
|
|
// Our instruction is of the form <instructionname>_n.
|
|
|
|
bool HasNPostfix = false;
|
|
|
|
// Our instruction is of the form <instructionname>_lane.
|
|
|
|
bool HasLanePostfix = false;
|
|
|
|
// Our instruction is of the form <instructionname>_dup.
|
|
|
|
bool HasDupPostfix = false;
|
|
|
|
// Our instruction is a vcvt instruction which requires special handling.
|
|
|
|
bool IsSpecialVCvt = false;
|
|
|
|
// If we have a vtbxN or vtblN instruction, this is set to N.
|
|
|
|
size_t TBNumber = -1;
|
|
|
|
// Register Suffix
|
|
|
|
std::string RegisterSuffix;
|
|
|
|
|
|
|
|
PreprocessInstruction(NameRef, InstName, Prefix,
|
|
|
|
HasNPostfix, HasLanePostfix, HasDupPostfix,
|
|
|
|
IsSpecialVCvt, TBNumber);
|
|
|
|
|
|
|
|
InstructionTypeCode(OutTypeStr, Ck, IsQuad, OutTypeCode);
|
|
|
|
GenerateRegisterCheckPattern(Name, Proto, OutTypeCode, HasNPostfix, IsQuad,
|
|
|
|
HasLanePostfix, HasDupPostfix, TBNumber,
|
|
|
|
RegisterSuffix);
|
|
|
|
|
|
|
|
// In the following section, we handle a bunch of special cases. You can tell
|
|
|
|
// a special case by the fact we are returning early.
|
|
|
|
|
|
|
|
// If our instruction is a logical instruction without postfix or a
|
|
|
|
// hidden LOp just return the current Prefix.
|
|
|
|
if (Ck == ClassL || IsHiddenLOp) {
|
|
|
|
Result.push_back(Prefix + " " + RegisterSuffix);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we have a vmov, due to the many different cases, some of which
|
|
|
|
// vary within the different intrinsics generated for a single
|
|
|
|
// instruction type, just output a vmov. (e.g. given an instruction
|
|
|
|
// A, A.u32 might be vmov and A.u8 might be vmov.8).
|
|
|
|
//
|
|
|
|
// FIXME: Maybe something can be done about this. The two cases that we care
|
|
|
|
// about are vmov as an LType and vmov as a WType.
|
|
|
|
if (Prefix == "vmov") {
|
|
|
|
Result.push_back(Prefix + " " + RegisterSuffix);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// In the following section, we handle special cases.
|
|
|
|
|
|
|
|
if (OutTypeCode == "64") {
|
|
|
|
// If we have a 64 bit vdup/vext and are handling an uint64x1_t
|
|
|
|
// type, the intrinsic will be optimized away, so just return
|
|
|
|
// nothing. On the other hand if we are handling an uint64x2_t
|
|
|
|
// (i.e. quad instruction), vdup/vmov instructions should be
|
|
|
|
// emitted.
|
|
|
|
if (Prefix == "vdup" || Prefix == "vext") {
|
|
|
|
if (IsQuad) {
|
|
|
|
Result.push_back("{{vmov|vdup}}");
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// v{st,ld}{2,3,4}_{u,s}64 emit v{st,ld}1.64 instructions with
|
|
|
|
// multiple register operands.
|
|
|
|
bool MultiLoadPrefix = Prefix == "vld2" || Prefix == "vld3"
|
|
|
|
|| Prefix == "vld4";
|
|
|
|
bool MultiStorePrefix = Prefix == "vst2" || Prefix == "vst3"
|
|
|
|
|| Prefix == "vst4";
|
|
|
|
if (MultiLoadPrefix || MultiStorePrefix) {
|
|
|
|
Result.push_back(NameRef.slice(0, 3).str() + "1.64");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// v{st,ld}1_{lane,dup}_{u64,s64} use vldr/vstr/vmov/str instead of
|
|
|
|
// emitting said instructions. So return a check for
|
|
|
|
// vldr/vstr/vmov/str instead.
|
|
|
|
if (HasLanePostfix || HasDupPostfix) {
|
|
|
|
if (Prefix == "vst1") {
|
|
|
|
Result.push_back("{{str|vstr|vmov}}");
|
|
|
|
return;
|
|
|
|
} else if (Prefix == "vld1") {
|
|
|
|
Result.push_back("{{ldr|vldr|vmov}}");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// vzip.32/vuzp.32 are the same instruction as vtrn.32 and are
|
|
|
|
// sometimes disassembled as vtrn.32. We use a regex to handle both
|
|
|
|
// cases.
|
|
|
|
if ((Prefix == "vzip" || Prefix == "vuzp") && OutTypeCode == "32") {
|
|
|
|
Result.push_back("{{vtrn|" + Prefix + "}}.32 " + RegisterSuffix);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Currently on most ARM processors, we do not use vmla/vmls for
|
|
|
|
// quad floating point operations. Instead we output vmul + vadd. So
|
|
|
|
// check if we have one of those instructions and just output a
|
|
|
|
// check for vmul.
|
|
|
|
if (OutTypeCode == "f32") {
|
|
|
|
if (Prefix == "vmls") {
|
|
|
|
Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
|
|
|
|
Result.push_back("vsub." + OutTypeCode);
|
|
|
|
return;
|
|
|
|
} else if (Prefix == "vmla") {
|
|
|
|
Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
|
|
|
|
Result.push_back("vadd." + OutTypeCode);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we have vcvt, get the input type from the instruction name
|
|
|
|
// (which should be of the form instname_inputtype) and append it
|
|
|
|
// before the output type.
|
|
|
|
if (Prefix == "vcvt") {
|
|
|
|
const std::string inTypeCode = NameRef.substr(NameRef.find_last_of("_")+1);
|
|
|
|
Prefix += "." + inTypeCode;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Append output type code to get our final mangled instruction.
|
|
|
|
Prefix += "." + OutTypeCode;
|
|
|
|
|
|
|
|
Result.push_back(Prefix + " " + RegisterSuffix);
|
|
|
|
}
|
|
|
|
|
2011-10-06 21:03:08 +08:00
|
|
|
/// UseMacro - Examine the prototype string to determine if the intrinsic
|
|
|
|
/// should be defined as a preprocessor macro instead of an inline function.
|
|
|
|
static bool UseMacro(const std::string &proto) {
|
|
|
|
// If this builtin takes an immediate argument, we need to #define it rather
|
|
|
|
// than use a standard declaration, so that SemaChecking can range check
|
|
|
|
// the immediate passed by the user.
|
|
|
|
if (proto.find('i') != std::string::npos)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
// Pointer arguments need to use macros to avoid hiding aligned attributes
|
|
|
|
// from the pointer type.
|
|
|
|
if (proto.find('p') != std::string::npos ||
|
|
|
|
proto.find('c') != std::string::npos)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is
|
|
|
|
/// defined as a macro should be accessed directly instead of being first
|
|
|
|
/// assigned to a local temporary.
|
|
|
|
static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) {
|
|
|
|
// True for constant ints (i), pointers (p) and const pointers (c).
|
|
|
|
return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c');
|
|
|
|
}
|
|
|
|
|
|
|
|
// Generate the string "(argtype a, argtype b, ...)"
|
2013-10-11 10:34:30 +08:00
|
|
|
static std::string GenArgs(const std::string &proto, StringRef typestr,
|
|
|
|
const std::string &name) {
|
2011-10-06 21:03:08 +08:00
|
|
|
bool define = UseMacro(proto);
|
|
|
|
char arg = 'a';
|
|
|
|
|
|
|
|
std::string s;
|
|
|
|
s += "(";
|
|
|
|
|
|
|
|
for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
|
|
|
|
if (define) {
|
|
|
|
// Some macro arguments are used directly instead of being assigned
|
|
|
|
// to local temporaries; prepend an underscore prefix to make their
|
|
|
|
// names consistent with the local temporaries.
|
|
|
|
if (MacroArgUsedDirectly(proto, i))
|
|
|
|
s += "__";
|
|
|
|
} else {
|
|
|
|
s += TypeString(proto[i], typestr) + " __";
|
|
|
|
}
|
|
|
|
s.push_back(arg);
|
2013-10-11 10:34:30 +08:00
|
|
|
//To avoid argument being multiple defined, add extra number for renaming.
|
2013-11-05 10:05:44 +08:00
|
|
|
if (name == "vcopy_lane" || name == "vcopy_laneq")
|
2013-10-11 10:34:30 +08:00
|
|
|
s.push_back('1');
|
2011-10-06 21:03:08 +08:00
|
|
|
if ((i + 1) < e)
|
|
|
|
s += ", ";
|
|
|
|
}
|
|
|
|
|
|
|
|
s += ")";
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Macro arguments are not type-checked like inline function arguments, so
|
|
|
|
// assign them to local temporaries to get the right type checking.
|
2013-10-11 10:34:30 +08:00
|
|
|
static std::string GenMacroLocals(const std::string &proto, StringRef typestr,
|
|
|
|
const std::string &name ) {
|
2011-10-06 21:03:08 +08:00
|
|
|
char arg = 'a';
|
|
|
|
std::string s;
|
|
|
|
bool generatedLocal = false;
|
|
|
|
|
|
|
|
for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
|
|
|
|
// Do not create a temporary for an immediate argument.
|
|
|
|
// That would defeat the whole point of using a macro!
|
|
|
|
if (MacroArgUsedDirectly(proto, i))
|
|
|
|
continue;
|
|
|
|
generatedLocal = true;
|
2013-10-11 10:34:30 +08:00
|
|
|
bool extranumber = false;
|
2013-11-05 10:05:44 +08:00
|
|
|
if (name == "vcopy_lane" || name == "vcopy_laneq")
|
2013-10-11 10:34:30 +08:00
|
|
|
extranumber = true;
|
2011-10-06 21:03:08 +08:00
|
|
|
|
|
|
|
s += TypeString(proto[i], typestr) + " __";
|
|
|
|
s.push_back(arg);
|
2013-10-11 10:34:30 +08:00
|
|
|
if(extranumber)
|
|
|
|
s.push_back('1');
|
2011-10-06 21:03:08 +08:00
|
|
|
s += " = (";
|
|
|
|
s.push_back(arg);
|
2013-10-11 10:34:30 +08:00
|
|
|
if(extranumber)
|
|
|
|
s.push_back('1');
|
2011-10-06 21:03:08 +08:00
|
|
|
s += "); ";
|
|
|
|
}
|
|
|
|
|
|
|
|
if (generatedLocal)
|
|
|
|
s += "\\\n ";
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Use the vmovl builtin to sign-extend or zero-extend a vector.
|
Implement aarch64 neon instruction set AdvSIMD (3V Diff), covering the following 26 instructions,
SADDL, UADDL, SADDW, UADDW, SSUBL, USUBL, SSUBW, USUBW, ADDHN, RADDHN, SABAL, UABAL, SUBHN, RSUBHN, SABDL, UABDL, SMLAL, UMLAL, SMLSL, UMLSL, SQDMLAL, SQDMLSL, SMULL, UMULL, SQDMULL, PMULL
llvm-svn: 190289
2013-09-09 10:21:08 +08:00
|
|
|
static std::string Extend(StringRef typestr, const std::string &a, bool h=0) {
|
|
|
|
std::string s, high;
|
|
|
|
high = h ? "_high" : "";
|
|
|
|
s = MangleName("vmovl" + high, typestr, ClassS);
|
|
|
|
s += "(" + a + ")";
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get the high 64-bit part of a vector
|
|
|
|
static std::string GetHigh(const std::string &a, StringRef typestr) {
|
2011-10-06 21:03:08 +08:00
|
|
|
std::string s;
|
Implement aarch64 neon instruction set AdvSIMD (3V Diff), covering the following 26 instructions,
SADDL, UADDL, SADDW, UADDW, SSUBL, USUBL, SSUBW, USUBW, ADDHN, RADDHN, SABAL, UABAL, SUBHN, RSUBHN, SABDL, UABDL, SMLAL, UMLAL, SMLSL, UMLSL, SQDMLAL, SQDMLSL, SMULL, UMULL, SQDMULL, PMULL
llvm-svn: 190289
2013-09-09 10:21:08 +08:00
|
|
|
s = MangleName("vget_high", typestr, ClassS);
|
2011-10-06 21:03:08 +08:00
|
|
|
s += "(" + a + ")";
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
Implement aarch64 neon instruction set AdvSIMD (3V Diff), covering the following 26 instructions,
SADDL, UADDL, SADDW, UADDW, SSUBL, USUBL, SSUBW, USUBW, ADDHN, RADDHN, SABAL, UABAL, SUBHN, RSUBHN, SABDL, UABDL, SMLAL, UMLAL, SMLSL, UMLSL, SQDMLAL, SQDMLSL, SMULL, UMULL, SQDMULL, PMULL
llvm-svn: 190289
2013-09-09 10:21:08 +08:00
|
|
|
// Gen operation with two operands and get high 64-bit for both of two operands.
|
|
|
|
static std::string Gen2OpWith2High(StringRef typestr,
|
|
|
|
const std::string &op,
|
|
|
|
const std::string &a,
|
|
|
|
const std::string &b) {
|
|
|
|
std::string s;
|
|
|
|
std::string Op1 = GetHigh(a, typestr);
|
|
|
|
std::string Op2 = GetHigh(b, typestr);
|
|
|
|
s = MangleName(op, typestr, ClassS);
|
|
|
|
s += "(" + Op1 + ", " + Op2 + ");";
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Gen operation with three operands and get high 64-bit of the latter
|
|
|
|
// two operands.
|
|
|
|
static std::string Gen3OpWith2High(StringRef typestr,
|
|
|
|
const std::string &op,
|
|
|
|
const std::string &a,
|
|
|
|
const std::string &b,
|
|
|
|
const std::string &c) {
|
|
|
|
std::string s;
|
|
|
|
std::string Op1 = GetHigh(b, typestr);
|
|
|
|
std::string Op2 = GetHigh(c, typestr);
|
|
|
|
s = MangleName(op, typestr, ClassS);
|
|
|
|
s += "(" + a + ", " + Op1 + ", " + Op2 + ");";
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Gen combine operation by putting a on low 64-bit, and b on high 64-bit.
|
|
|
|
static std::string GenCombine(std::string typestr,
|
|
|
|
const std::string &a,
|
|
|
|
const std::string &b) {
|
|
|
|
std::string s;
|
|
|
|
s = MangleName("vcombine", typestr, ClassS);
|
|
|
|
s += "(" + a + ", " + b + ")";
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2011-10-06 21:03:08 +08:00
|
|
|
static std::string Duplicate(unsigned nElts, StringRef typestr,
|
|
|
|
const std::string &a) {
|
|
|
|
std::string s;
|
|
|
|
|
|
|
|
s = "(" + TypeString('d', typestr) + "){ ";
|
|
|
|
for (unsigned i = 0; i != nElts; ++i) {
|
|
|
|
s += a;
|
|
|
|
if ((i + 1) < nElts)
|
|
|
|
s += ", ";
|
|
|
|
}
|
|
|
|
s += " }";
|
|
|
|
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
static std::string SplatLane(unsigned nElts, const std::string &vec,
|
|
|
|
const std::string &lane) {
|
|
|
|
std::string s = "__builtin_shufflevector(" + vec + ", " + vec;
|
|
|
|
for (unsigned i = 0; i < nElts; ++i)
|
|
|
|
s += ", " + lane;
|
|
|
|
s += ")";
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
Inplement aarch64 neon instructions in AdvSIMD(shift). About 24 shift instructions:
sshr,ushr,ssra,usra,srshr,urshr,srsra,ursra,sri,shl,sli,sqshlu,sqshl,uqshl,shrn,sqrshr$
and 4 convert instructions:
scvtf,ucvtf,fcvtzs,fcvtzu
llvm-svn: 189926
2013-09-04 17:29:13 +08:00
|
|
|
static std::string RemoveHigh(const std::string &name) {
|
|
|
|
std::string s = name;
|
|
|
|
std::size_t found = s.find("_high_");
|
|
|
|
if (found == std::string::npos)
|
|
|
|
PrintFatalError("name should contain \"_high_\" for high intrinsics");
|
|
|
|
s.replace(found, 5, "");
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2011-10-06 21:03:08 +08:00
|
|
|
static unsigned GetNumElements(StringRef typestr, bool &quad) {
|
|
|
|
quad = false;
|
|
|
|
bool dummy = false;
|
|
|
|
char type = ClassifyType(typestr, quad, dummy, dummy);
|
|
|
|
unsigned nElts = 0;
|
|
|
|
switch (type) {
|
|
|
|
case 'c': nElts = 8; break;
|
|
|
|
case 's': nElts = 4; break;
|
|
|
|
case 'i': nElts = 2; break;
|
|
|
|
case 'l': nElts = 1; break;
|
|
|
|
case 'h': nElts = 4; break;
|
|
|
|
case 'f': nElts = 2; break;
|
2013-08-01 17:23:19 +08:00
|
|
|
case 'd':
|
|
|
|
nElts = 1;
|
|
|
|
break;
|
2011-10-06 21:03:08 +08:00
|
|
|
default:
|
2012-10-26 00:37:08 +08:00
|
|
|
PrintFatalError("unhandled type!");
|
2011-10-06 21:03:08 +08:00
|
|
|
}
|
|
|
|
if (quad) nElts <<= 1;
|
|
|
|
return nElts;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Generate the definition for this intrinsic, e.g. "a + b" for OpAdd.
|
Inplement aarch64 neon instructions in AdvSIMD(shift). About 24 shift instructions:
sshr,ushr,ssra,usra,srshr,urshr,srsra,ursra,sri,shl,sli,sqshlu,sqshl,uqshl,shrn,sqrshr$
and 4 convert instructions:
scvtf,ucvtf,fcvtzs,fcvtzu
llvm-svn: 189926
2013-09-04 17:29:13 +08:00
|
|
|
static std::string GenOpString(const std::string &name, OpKind op,
|
|
|
|
const std::string &proto, StringRef typestr) {
|
2011-10-06 21:03:08 +08:00
|
|
|
bool quad;
|
|
|
|
unsigned nElts = GetNumElements(typestr, quad);
|
|
|
|
bool define = UseMacro(proto);
|
|
|
|
|
|
|
|
std::string ts = TypeString(proto[0], typestr);
|
|
|
|
std::string s;
|
|
|
|
if (!define) {
|
|
|
|
s = "return ";
|
|
|
|
}
|
|
|
|
|
|
|
|
switch(op) {
|
|
|
|
case OpAdd:
|
|
|
|
s += "__a + __b;";
|
|
|
|
break;
|
|
|
|
case OpAddl:
|
|
|
|
s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";";
|
|
|
|
break;
|
Implement aarch64 neon instruction set AdvSIMD (3V Diff), covering the following 26 instructions,
SADDL, UADDL, SADDW, UADDW, SSUBL, USUBL, SSUBW, USUBW, ADDHN, RADDHN, SABAL, UABAL, SUBHN, RSUBHN, SABDL, UABDL, SMLAL, UMLAL, SMLSL, UMLSL, SQDMLAL, SQDMLSL, SMULL, UMULL, SQDMULL, PMULL
llvm-svn: 190289
2013-09-09 10:21:08 +08:00
|
|
|
case OpAddlHi:
|
|
|
|
s += Extend(typestr, "__a", 1) + " + " + Extend(typestr, "__b", 1) + ";";
|
|
|
|
break;
|
2011-10-06 21:03:08 +08:00
|
|
|
case OpAddw:
|
|
|
|
s += "__a + " + Extend(typestr, "__b") + ";";
|
|
|
|
break;
|
Implement aarch64 neon instruction set AdvSIMD (3V Diff), covering the following 26 instructions,
SADDL, UADDL, SADDW, UADDW, SSUBL, USUBL, SSUBW, USUBW, ADDHN, RADDHN, SABAL, UABAL, SUBHN, RSUBHN, SABDL, UABDL, SMLAL, UMLAL, SMLSL, UMLSL, SQDMLAL, SQDMLSL, SMULL, UMULL, SQDMULL, PMULL
llvm-svn: 190289
2013-09-09 10:21:08 +08:00
|
|
|
case OpAddwHi:
|
|
|
|
s += "__a + " + Extend(typestr, "__b", 1) + ";";
|
|
|
|
break;
|
2011-10-06 21:03:08 +08:00
|
|
|
case OpSub:
|
|
|
|
s += "__a - __b;";
|
|
|
|
break;
|
|
|
|
case OpSubl:
|
|
|
|
s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";";
|
|
|
|
break;
|
Implement aarch64 neon instruction set AdvSIMD (3V Diff), covering the following 26 instructions,
SADDL, UADDL, SADDW, UADDW, SSUBL, USUBL, SSUBW, USUBW, ADDHN, RADDHN, SABAL, UABAL, SUBHN, RSUBHN, SABDL, UABDL, SMLAL, UMLAL, SMLSL, UMLSL, SQDMLAL, SQDMLSL, SMULL, UMULL, SQDMULL, PMULL
llvm-svn: 190289
2013-09-09 10:21:08 +08:00
|
|
|
case OpSublHi:
|
|
|
|
s += Extend(typestr, "__a", 1) + " - " + Extend(typestr, "__b", 1) + ";";
|
|
|
|
break;
|
2011-10-06 21:03:08 +08:00
|
|
|
case OpSubw:
|
|
|
|
s += "__a - " + Extend(typestr, "__b") + ";";
|
|
|
|
break;
|
Implement aarch64 neon instruction set AdvSIMD (3V Diff), covering the following 26 instructions,
SADDL, UADDL, SADDW, UADDW, SSUBL, USUBL, SSUBW, USUBW, ADDHN, RADDHN, SABAL, UABAL, SUBHN, RSUBHN, SABDL, UABDL, SMLAL, UMLAL, SMLSL, UMLSL, SQDMLAL, SQDMLSL, SMULL, UMULL, SQDMULL, PMULL
llvm-svn: 190289
2013-09-09 10:21:08 +08:00
|
|
|
case OpSubwHi:
|
|
|
|
s += "__a - " + Extend(typestr, "__b", 1) + ";";
|
|
|
|
break;
|
2011-10-06 21:03:08 +08:00
|
|
|
case OpMulN:
|
|
|
|
s += "__a * " + Duplicate(nElts, typestr, "__b") + ";";
|
|
|
|
break;
|
|
|
|
case OpMulLane:
|
|
|
|
s += "__a * " + SplatLane(nElts, "__b", "__c") + ";";
|
|
|
|
break;
|
2013-10-04 17:21:17 +08:00
|
|
|
case OpMulXLane:
|
|
|
|
s += MangleName("vmulx", typestr, ClassS) + "(__a, " +
|
|
|
|
SplatLane(nElts, "__b", "__c") + ");";
|
|
|
|
break;
|
2011-10-06 21:03:08 +08:00
|
|
|
case OpMul:
|
|
|
|
s += "__a * __b;";
|
|
|
|
break;
|
|
|
|
case OpMullLane:
|
|
|
|
s += MangleName("vmull", typestr, ClassS) + "(__a, " +
|
|
|
|
SplatLane(nElts, "__b", "__c") + ");";
|
|
|
|
break;
|
2013-10-04 17:21:17 +08:00
|
|
|
case OpMullHiLane:
|
|
|
|
s += MangleName("vmull", typestr, ClassS) + "(" +
|
|
|
|
GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");";
|
|
|
|
break;
|
2011-10-06 21:03:08 +08:00
|
|
|
case OpMlaN:
|
|
|
|
s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");";
|
|
|
|
break;
|
|
|
|
case OpMlaLane:
|
|
|
|
s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");";
|
|
|
|
break;
|
|
|
|
case OpMla:
|
|
|
|
s += "__a + (__b * __c);";
|
|
|
|
break;
|
|
|
|
case OpMlalN:
|
|
|
|
s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
|
|
|
|
Duplicate(nElts, typestr, "__c") + ");";
|
|
|
|
break;
|
|
|
|
case OpMlalLane:
|
|
|
|
s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
|
|
|
|
SplatLane(nElts, "__c", "__d") + ");";
|
|
|
|
break;
|
2013-10-04 17:21:17 +08:00
|
|
|
case OpMlalHiLane:
|
|
|
|
s += "__a + " + MangleName("vmull", typestr, ClassS) + "(" +
|
|
|
|
GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
|
|
|
|
break;
|
2011-10-06 21:03:08 +08:00
|
|
|
case OpMlal:
|
|
|
|
s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
|
|
|
|
break;
|
Implement aarch64 neon instruction set AdvSIMD (3V Diff), covering the following 26 instructions,
SADDL, UADDL, SADDW, UADDW, SSUBL, USUBL, SSUBW, USUBW, ADDHN, RADDHN, SABAL, UABAL, SUBHN, RSUBHN, SABDL, UABDL, SMLAL, UMLAL, SMLSL, UMLSL, SQDMLAL, SQDMLSL, SMULL, UMULL, SQDMULL, PMULL
llvm-svn: 190289
2013-09-09 10:21:08 +08:00
|
|
|
case OpMullHi:
|
|
|
|
s += Gen2OpWith2High(typestr, "vmull", "__a", "__b");
|
|
|
|
break;
|
|
|
|
case OpMlalHi:
|
|
|
|
s += Gen3OpWith2High(typestr, "vmlal", "__a", "__b", "__c");
|
|
|
|
break;
|
2011-10-06 21:03:08 +08:00
|
|
|
case OpMlsN:
|
|
|
|
s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");";
|
|
|
|
break;
|
|
|
|
case OpMlsLane:
|
|
|
|
s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");";
|
|
|
|
break;
|
2013-10-04 17:21:17 +08:00
|
|
|
case OpFMSLane:
|
|
|
|
s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n ";
|
|
|
|
s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n ";
|
|
|
|
s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n ";
|
|
|
|
s += MangleName("vfma_lane", typestr, ClassS) + "(__a1, __b1, -__c1, __d);";
|
|
|
|
break;
|
|
|
|
case OpFMSLaneQ:
|
|
|
|
s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n ";
|
|
|
|
s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n ";
|
|
|
|
s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n ";
|
|
|
|
s += MangleName("vfma_laneq", typestr, ClassS) + "(__a1, __b1, -__c1, __d);";
|
|
|
|
break;
|
2011-10-06 21:03:08 +08:00
|
|
|
case OpMls:
|
|
|
|
s += "__a - (__b * __c);";
|
|
|
|
break;
|
|
|
|
case OpMlslN:
|
|
|
|
s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
|
|
|
|
Duplicate(nElts, typestr, "__c") + ");";
|
|
|
|
break;
|
|
|
|
case OpMlslLane:
|
|
|
|
s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
|
|
|
|
SplatLane(nElts, "__c", "__d") + ");";
|
|
|
|
break;
|
2013-10-04 17:21:17 +08:00
|
|
|
case OpMlslHiLane:
|
|
|
|
s += "__a - " + MangleName("vmull", typestr, ClassS) + "(" +
|
|
|
|
GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
|
|
|
|
break;
|
2011-10-06 21:03:08 +08:00
|
|
|
case OpMlsl:
|
|
|
|
s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
|
|
|
|
break;
|
Implement aarch64 neon instruction set AdvSIMD (3V Diff), covering the following 26 instructions,
SADDL, UADDL, SADDW, UADDW, SSUBL, USUBL, SSUBW, USUBW, ADDHN, RADDHN, SABAL, UABAL, SUBHN, RSUBHN, SABDL, UABDL, SMLAL, UMLAL, SMLSL, UMLSL, SQDMLAL, SQDMLSL, SMULL, UMULL, SQDMULL, PMULL
llvm-svn: 190289
2013-09-09 10:21:08 +08:00
|
|
|
case OpMlslHi:
|
|
|
|
s += Gen3OpWith2High(typestr, "vmlsl", "__a", "__b", "__c");
|
|
|
|
break;
|
2011-10-06 21:03:08 +08:00
|
|
|
case OpQDMullLane:
|
|
|
|
s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
|
|
|
|
SplatLane(nElts, "__b", "__c") + ");";
|
|
|
|
break;
|
2013-10-04 17:21:17 +08:00
|
|
|
case OpQDMullHiLane:
|
|
|
|
s += MangleName("vqdmull", typestr, ClassS) + "(" +
|
|
|
|
GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");";
|
|
|
|
break;
|
2011-10-06 21:03:08 +08:00
|
|
|
case OpQDMlalLane:
|
|
|
|
s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " +
|
|
|
|
SplatLane(nElts, "__c", "__d") + ");";
|
|
|
|
break;
|
2013-10-04 17:21:17 +08:00
|
|
|
case OpQDMlalHiLane:
|
|
|
|
s += MangleName("vqdmlal", typestr, ClassS) + "(__a, " +
|
|
|
|
GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
|
|
|
|
break;
|
2011-10-06 21:03:08 +08:00
|
|
|
case OpQDMlslLane:
|
|
|
|
s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " +
|
|
|
|
SplatLane(nElts, "__c", "__d") + ");";
|
|
|
|
break;
|
2013-10-04 17:21:17 +08:00
|
|
|
case OpQDMlslHiLane:
|
|
|
|
s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, " +
|
|
|
|
GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
|
|
|
|
break;
|
2011-10-06 21:03:08 +08:00
|
|
|
case OpQDMulhLane:
|
|
|
|
s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
|
|
|
|
SplatLane(nElts, "__b", "__c") + ");";
|
|
|
|
break;
|
|
|
|
case OpQRDMulhLane:
|
|
|
|
s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
|
|
|
|
SplatLane(nElts, "__b", "__c") + ");";
|
|
|
|
break;
|
|
|
|
case OpEq:
|
|
|
|
s += "(" + ts + ")(__a == __b);";
|
|
|
|
break;
|
|
|
|
case OpGe:
|
|
|
|
s += "(" + ts + ")(__a >= __b);";
|
|
|
|
break;
|
|
|
|
case OpLe:
|
|
|
|
s += "(" + ts + ")(__a <= __b);";
|
|
|
|
break;
|
|
|
|
case OpGt:
|
|
|
|
s += "(" + ts + ")(__a > __b);";
|
|
|
|
break;
|
|
|
|
case OpLt:
|
|
|
|
s += "(" + ts + ")(__a < __b);";
|
|
|
|
break;
|
|
|
|
case OpNeg:
|
|
|
|
s += " -__a;";
|
|
|
|
break;
|
|
|
|
case OpNot:
|
|
|
|
s += " ~__a;";
|
|
|
|
break;
|
|
|
|
case OpAnd:
|
|
|
|
s += "__a & __b;";
|
|
|
|
break;
|
|
|
|
case OpOr:
|
|
|
|
s += "__a | __b;";
|
|
|
|
break;
|
|
|
|
case OpXor:
|
|
|
|
s += "__a ^ __b;";
|
|
|
|
break;
|
|
|
|
case OpAndNot:
|
|
|
|
s += "__a & ~__b;";
|
|
|
|
break;
|
|
|
|
case OpOrNot:
|
|
|
|
s += "__a | ~__b;";
|
|
|
|
break;
|
|
|
|
case OpCast:
|
|
|
|
s += "(" + ts + ")__a;";
|
|
|
|
break;
|
|
|
|
case OpConcat:
|
|
|
|
s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a";
|
|
|
|
s += ", (int64x1_t)__b, 0, 1);";
|
|
|
|
break;
|
|
|
|
case OpHi:
|
ARM: Improve codegen for vget_low_* and vget_high_ intrinsics.
These intrinsics use the __builtin_shuffle() function to extract the
low and high half, respectively, of a 128-bit NEON vector. Currently,
they're defined to use bitcasts to simplify the emitter, so we get code
like:
uint16x4_t vget_low_u32(uint16x8_t __a) {
return (uint32x2_t) __builtin_shufflevector((int64x2_t) __a,
(int64x2_t) __a,
0);
}
While this works, it results in those bitcasts going all the way through
to the IR, resulting in code like:
%1 = bitcast <8 x i16> %in to <2 x i64>
%2 = shufflevector <2 x i64> %1, <2 x i64> undef, <1 x i32>
%zeroinitializer
%3 = bitcast <1 x i64> %2 to <4 x i16>
We can instead easily perform the operation directly on the input vector
like:
uint16x4_t vget_low_u16(uint16x8_t __a) {
return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
}
Not only is that much easier to read on its own, it also results in
cleaner IR like:
%1 = shufflevector <8 x i16> %in, <8 x i16> undef,
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
This is both easier to read and easier for the back end to reason
about effectively since the operation is obfuscating the source with
bitcasts.
rdar://13894163
llvm-svn: 181865
2013-05-15 10:40:04 +08:00
|
|
|
// nElts is for the result vector, so the source is twice that number.
|
|
|
|
s += "__builtin_shufflevector(__a, __a";
|
|
|
|
for (unsigned i = nElts; i < nElts * 2; ++i)
|
|
|
|
s += ", " + utostr(i);
|
|
|
|
s+= ");";
|
2011-10-06 21:03:08 +08:00
|
|
|
break;
|
|
|
|
case OpLo:
|
ARM: Improve codegen for vget_low_* and vget_high_ intrinsics.
These intrinsics use the __builtin_shuffle() function to extract the
low and high half, respectively, of a 128-bit NEON vector. Currently,
they're defined to use bitcasts to simplify the emitter, so we get code
like:
uint16x4_t vget_low_u32(uint16x8_t __a) {
return (uint32x2_t) __builtin_shufflevector((int64x2_t) __a,
(int64x2_t) __a,
0);
}
While this works, it results in those bitcasts going all the way through
to the IR, resulting in code like:
%1 = bitcast <8 x i16> %in to <2 x i64>
%2 = shufflevector <2 x i64> %1, <2 x i64> undef, <1 x i32>
%zeroinitializer
%3 = bitcast <1 x i64> %2 to <4 x i16>
We can instead easily perform the operation directly on the input vector
like:
uint16x4_t vget_low_u16(uint16x8_t __a) {
return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
}
Not only is that much easier to read on its own, it also results in
cleaner IR like:
%1 = shufflevector <8 x i16> %in, <8 x i16> undef,
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
This is both easier to read and easier for the back end to reason
about effectively since the operation is obfuscating the source with
bitcasts.
rdar://13894163
llvm-svn: 181865
2013-05-15 10:40:04 +08:00
|
|
|
s += "__builtin_shufflevector(__a, __a";
|
|
|
|
for (unsigned i = 0; i < nElts; ++i)
|
|
|
|
s += ", " + utostr(i);
|
|
|
|
s+= ");";
|
2011-10-06 21:03:08 +08:00
|
|
|
break;
|
|
|
|
case OpDup:
|
|
|
|
s += Duplicate(nElts, typestr, "__a") + ";";
|
|
|
|
break;
|
|
|
|
case OpDupLane:
|
|
|
|
s += SplatLane(nElts, "__a", "__b") + ";";
|
|
|
|
break;
|
|
|
|
case OpSelect:
|
|
|
|
// ((0 & 1) | (~0 & 2))
|
|
|
|
s += "(" + ts + ")";
|
|
|
|
ts = TypeString(proto[1], typestr);
|
|
|
|
s += "((__a & (" + ts + ")__b) | ";
|
|
|
|
s += "(~__a & (" + ts + ")__c));";
|
|
|
|
break;
|
|
|
|
case OpRev16:
|
|
|
|
s += "__builtin_shufflevector(__a, __a";
|
|
|
|
for (unsigned i = 2; i <= nElts; i += 2)
|
|
|
|
for (unsigned j = 0; j != 2; ++j)
|
|
|
|
s += ", " + utostr(i - j - 1);
|
|
|
|
s += ");";
|
|
|
|
break;
|
|
|
|
case OpRev32: {
|
|
|
|
unsigned WordElts = nElts >> (1 + (int)quad);
|
|
|
|
s += "__builtin_shufflevector(__a, __a";
|
|
|
|
for (unsigned i = WordElts; i <= nElts; i += WordElts)
|
|
|
|
for (unsigned j = 0; j != WordElts; ++j)
|
|
|
|
s += ", " + utostr(i - j - 1);
|
|
|
|
s += ");";
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OpRev64: {
|
|
|
|
unsigned DblWordElts = nElts >> (int)quad;
|
|
|
|
s += "__builtin_shufflevector(__a, __a";
|
|
|
|
for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts)
|
|
|
|
for (unsigned j = 0; j != DblWordElts; ++j)
|
|
|
|
s += ", " + utostr(i - j - 1);
|
|
|
|
s += ");";
|
|
|
|
break;
|
|
|
|
}
|
2013-11-14 10:45:18 +08:00
|
|
|
case OpXtnHi: {
|
|
|
|
s = TypeString(proto[1], typestr) + " __a1 = " +
|
|
|
|
MangleName("vmovn", typestr, ClassS) + "(__b);\n " +
|
|
|
|
"return __builtin_shufflevector(__a, __a1";
|
|
|
|
for (unsigned i = 0; i < nElts * 4; ++i)
|
|
|
|
s += ", " + utostr(i);
|
|
|
|
s += ");";
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OpSqxtunHi: {
|
|
|
|
s = TypeString(proto[1], typestr) + " __a1 = " +
|
|
|
|
MangleName("vqmovun", typestr, ClassS) + "(__b);\n " +
|
|
|
|
"return __builtin_shufflevector(__a, __a1";
|
|
|
|
for (unsigned i = 0; i < nElts * 4; ++i)
|
|
|
|
s += ", " + utostr(i);
|
|
|
|
s += ");";
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OpQxtnHi: {
|
|
|
|
s = TypeString(proto[1], typestr) + " __a1 = " +
|
|
|
|
MangleName("vqmovn", typestr, ClassS) + "(__b);\n " +
|
|
|
|
"return __builtin_shufflevector(__a, __a1";
|
|
|
|
for (unsigned i = 0; i < nElts * 4; ++i)
|
|
|
|
s += ", " + utostr(i);
|
|
|
|
s += ");";
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OpFcvtnHi: {
|
|
|
|
std::string FName = (nElts == 1) ? "vcvt_f32" : "vcvt_f16";
|
|
|
|
s = TypeString(proto[1], typestr) + " __a1 = " +
|
|
|
|
MangleName(FName, typestr, ClassS) + "(__b);\n " +
|
|
|
|
"return __builtin_shufflevector(__a, __a1";
|
|
|
|
for (unsigned i = 0; i < nElts * 4; ++i)
|
|
|
|
s += ", " + utostr(i);
|
|
|
|
s += ");";
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OpFcvtlHi: {
|
|
|
|
std::string FName = (nElts == 2) ? "vcvt_f64" : "vcvt_f32";
|
|
|
|
s = TypeString('d', typestr) + " __a1 = " + GetHigh("__a", typestr) +
|
|
|
|
";\n return " + MangleName(FName, typestr, ClassS) + "(__a1);";
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OpFcvtxnHi: {
|
|
|
|
s = TypeString(proto[1], typestr) + " __a1 = " +
|
|
|
|
MangleName("vcvtx_f32", typestr, ClassS) + "(__b);\n " +
|
|
|
|
"return __builtin_shufflevector(__a, __a1";
|
|
|
|
for (unsigned i = 0; i < nElts * 4; ++i)
|
|
|
|
s += ", " + utostr(i);
|
|
|
|
s += ");";
|
|
|
|
break;
|
|
|
|
}
|
2013-11-06 11:35:53 +08:00
|
|
|
case OpUzp1:
|
|
|
|
s += "__builtin_shufflevector(__a, __b";
|
|
|
|
for (unsigned i = 0; i < nElts; i++)
|
|
|
|
s += ", " + utostr(2*i);
|
|
|
|
s += ");";
|
|
|
|
break;
|
|
|
|
case OpUzp2:
|
|
|
|
s += "__builtin_shufflevector(__a, __b";
|
|
|
|
for (unsigned i = 0; i < nElts; i++)
|
|
|
|
s += ", " + utostr(2*i+1);
|
|
|
|
s += ");";
|
|
|
|
break;
|
|
|
|
case OpZip1:
|
|
|
|
s += "__builtin_shufflevector(__a, __b";
|
|
|
|
for (unsigned i = 0; i < (nElts/2); i++)
|
|
|
|
s += ", " + utostr(i) + ", " + utostr(i+nElts);
|
|
|
|
s += ");";
|
|
|
|
break;
|
|
|
|
case OpZip2:
|
|
|
|
s += "__builtin_shufflevector(__a, __b";
|
|
|
|
for (unsigned i = nElts/2; i < nElts; i++)
|
|
|
|
s += ", " + utostr(i) + ", " + utostr(i+nElts);
|
|
|
|
s += ");";
|
|
|
|
break;
|
|
|
|
case OpTrn1:
|
|
|
|
s += "__builtin_shufflevector(__a, __b";
|
|
|
|
for (unsigned i = 0; i < (nElts/2); i++)
|
|
|
|
s += ", " + utostr(2*i) + ", " + utostr(2*i+nElts);
|
|
|
|
s += ");";
|
|
|
|
break;
|
|
|
|
case OpTrn2:
|
|
|
|
s += "__builtin_shufflevector(__a, __b";
|
|
|
|
for (unsigned i = 0; i < (nElts/2); i++)
|
|
|
|
s += ", " + utostr(2*i+1) + ", " + utostr(2*i+1+nElts);
|
|
|
|
s += ");";
|
|
|
|
break;
|
2011-10-06 21:03:08 +08:00
|
|
|
case OpAbdl: {
|
|
|
|
std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)";
|
|
|
|
if (typestr[0] != 'U') {
|
|
|
|
// vabd results are always unsigned and must be zero-extended.
|
|
|
|
std::string utype = "U" + typestr.str();
|
|
|
|
s += "(" + TypeString(proto[0], typestr) + ")";
|
|
|
|
abd = "(" + TypeString('d', utype) + ")" + abd;
|
|
|
|
s += Extend(utype, abd) + ";";
|
|
|
|
} else {
|
|
|
|
s += Extend(typestr, abd) + ";";
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
Implement aarch64 neon instruction set AdvSIMD (3V Diff), covering the following 26 instructions,
SADDL, UADDL, SADDW, UADDW, SSUBL, USUBL, SSUBW, USUBW, ADDHN, RADDHN, SABAL, UABAL, SUBHN, RSUBHN, SABDL, UABDL, SMLAL, UMLAL, SMLSL, UMLSL, SQDMLAL, SQDMLSL, SMULL, UMULL, SQDMULL, PMULL
llvm-svn: 190289
2013-09-09 10:21:08 +08:00
|
|
|
case OpAbdlHi:
|
|
|
|
s += Gen2OpWith2High(typestr, "vabdl", "__a", "__b");
|
|
|
|
break;
|
|
|
|
case OpAddhnHi: {
|
|
|
|
std::string addhn = MangleName("vaddhn", typestr, ClassS) + "(__b, __c)";
|
|
|
|
s += GenCombine(GetNarrowTypestr(typestr), "__a", addhn);
|
|
|
|
s += ";";
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OpRAddhnHi: {
|
|
|
|
std::string raddhn = MangleName("vraddhn", typestr, ClassS) + "(__b, __c)";
|
|
|
|
s += GenCombine(GetNarrowTypestr(typestr), "__a", raddhn);
|
|
|
|
s += ";";
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OpSubhnHi: {
|
|
|
|
std::string subhn = MangleName("vsubhn", typestr, ClassS) + "(__b, __c)";
|
|
|
|
s += GenCombine(GetNarrowTypestr(typestr), "__a", subhn);
|
|
|
|
s += ";";
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OpRSubhnHi: {
|
|
|
|
std::string rsubhn = MangleName("vrsubhn", typestr, ClassS) + "(__b, __c)";
|
|
|
|
s += GenCombine(GetNarrowTypestr(typestr), "__a", rsubhn);
|
|
|
|
s += ";";
|
|
|
|
break;
|
|
|
|
}
|
2011-10-06 21:03:08 +08:00
|
|
|
case OpAba:
|
|
|
|
s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);";
|
|
|
|
break;
|
Implement aarch64 neon instruction set AdvSIMD (3V Diff), covering the following 26 instructions,
SADDL, UADDL, SADDW, UADDW, SSUBL, USUBL, SSUBW, USUBW, ADDHN, RADDHN, SABAL, UABAL, SUBHN, RSUBHN, SABDL, UABDL, SMLAL, UMLAL, SMLSL, UMLSL, SQDMLAL, SQDMLSL, SMULL, UMULL, SQDMULL, PMULL
llvm-svn: 190289
2013-09-09 10:21:08 +08:00
|
|
|
case OpAbal:
|
|
|
|
s += "__a + " + MangleName("vabdl", typestr, ClassS) + "(__b, __c);";
|
|
|
|
break;
|
|
|
|
case OpAbalHi:
|
|
|
|
s += Gen3OpWith2High(typestr, "vabal", "__a", "__b", "__c");
|
|
|
|
break;
|
|
|
|
case OpQDMullHi:
|
|
|
|
s += Gen2OpWith2High(typestr, "vqdmull", "__a", "__b");
|
|
|
|
break;
|
|
|
|
case OpQDMlalHi:
|
|
|
|
s += Gen3OpWith2High(typestr, "vqdmlal", "__a", "__b", "__c");
|
|
|
|
break;
|
|
|
|
case OpQDMlslHi:
|
|
|
|
s += Gen3OpWith2High(typestr, "vqdmlsl", "__a", "__b", "__c");
|
2011-10-06 21:03:08 +08:00
|
|
|
break;
|
2013-08-01 17:23:19 +08:00
|
|
|
case OpDiv:
|
|
|
|
s += "__a / __b;";
|
|
|
|
break;
|
Inplement aarch64 neon instructions in AdvSIMD(shift). About 24 shift instructions:
sshr,ushr,ssra,usra,srshr,urshr,srsra,ursra,sri,shl,sli,sqshlu,sqshl,uqshl,shrn,sqrshr$
and 4 convert instructions:
scvtf,ucvtf,fcvtzs,fcvtzu
llvm-svn: 189926
2013-09-04 17:29:13 +08:00
|
|
|
case OpMovlHi: {
|
|
|
|
s = TypeString(proto[1], typestr.drop_front()) + " __a1 = " +
|
|
|
|
MangleName("vget_high", typestr, ClassS) + "(__a);\n " + s;
|
|
|
|
s += "(" + ts + ")" + MangleName("vshll_n", typestr, ClassS);
|
|
|
|
s += "(__a1, 0);";
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OpLongHi: {
|
|
|
|
// Another local variable __a1 is needed for calling a Macro,
|
|
|
|
// or using __a will have naming conflict when Macro expanding.
|
|
|
|
s += TypeString(proto[1], typestr.drop_front()) + " __a1 = " +
|
|
|
|
MangleName("vget_high", typestr, ClassS) + "(__a); \\\n";
|
|
|
|
s += " (" + ts + ")" + MangleName(RemoveHigh(name), typestr, ClassS) +
|
|
|
|
"(__a1, __b);";
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OpNarrowHi: {
|
|
|
|
s += "(" + ts + ")" + MangleName("vcombine", typestr, ClassS) + "(__a, " +
|
|
|
|
MangleName(RemoveHigh(name), typestr, ClassS) + "(__b, __c));";
|
|
|
|
break;
|
|
|
|
}
|
2013-11-05 10:05:44 +08:00
|
|
|
case OpCopyLane: {
|
2013-10-11 10:34:30 +08:00
|
|
|
s += TypeString('s', typestr) + " __c2 = " +
|
|
|
|
MangleName("vget_lane", typestr, ClassS) + "(__c1, __d1); \\\n " +
|
|
|
|
MangleName("vset_lane", typestr, ClassS) + "(__c2, __a1, __b1);";
|
|
|
|
break;
|
|
|
|
}
|
2013-11-05 10:05:44 +08:00
|
|
|
case OpCopyQLane: {
|
|
|
|
std::string typeCode = "";
|
|
|
|
InstructionTypeCode(typestr, ClassS, quad, typeCode);
|
|
|
|
s += TypeString('s', typestr) + " __c2 = vget_lane_" + typeCode +
|
|
|
|
"(__c1, __d1); \\\n vsetq_lane_" + typeCode + "(__c2, __a1, __b1);";
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OpCopyLaneQ: {
|
|
|
|
std::string typeCode = "";
|
|
|
|
InstructionTypeCode(typestr, ClassS, quad, typeCode);
|
|
|
|
s += TypeString('s', typestr) + " __c2 = vgetq_lane_" + typeCode +
|
|
|
|
"(__c1, __d1); \\\n vset_lane_" + typeCode + "(__c2, __a1, __b1);";
|
|
|
|
break;
|
|
|
|
}
|
2013-11-16 07:33:31 +08:00
|
|
|
case OpScalarMulLane: {
|
|
|
|
std::string typeCode = "";
|
|
|
|
InstructionTypeCode(typestr, ClassS, quad, typeCode);
|
|
|
|
s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode +
|
|
|
|
"(__b, __c);\\\n __a * __d1;";
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OpScalarMulLaneQ: {
|
|
|
|
std::string typeCode = "";
|
|
|
|
InstructionTypeCode(typestr, ClassS, quad, typeCode);
|
|
|
|
s += TypeString('s', typestr) + " __d1 = vgetq_lane_" + typeCode +
|
|
|
|
"(__b, __c);\\\n __a * __d1;";
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OpScalarMulXLane: {
|
|
|
|
bool dummy = false;
|
|
|
|
char type = ClassifyType(typestr, dummy, dummy, dummy);
|
|
|
|
if (type == 'f') type = 's';
|
|
|
|
std::string typeCode = "";
|
|
|
|
InstructionTypeCode(typestr, ClassS, quad, typeCode);
|
|
|
|
s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode +
|
|
|
|
"(__b, __c);\\\n vmulx" + type + "_" +
|
|
|
|
typeCode + "(__a, __d1);";
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OpScalarMulXLaneQ: {
|
|
|
|
bool dummy = false;
|
|
|
|
char type = ClassifyType(typestr, dummy, dummy, dummy);
|
|
|
|
if (type == 'f') type = 's';
|
|
|
|
std::string typeCode = "";
|
|
|
|
InstructionTypeCode(typestr, ClassS, quad, typeCode);
|
|
|
|
s += TypeString('s', typestr) + " __d1 = vgetq_lane_" +
|
|
|
|
typeCode + "(__b, __c);\\\n vmulx" + type +
|
|
|
|
"_" + typeCode + "(__a, __d1);";
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
case OpScalarVMulXLane: {
|
|
|
|
bool dummy = false;
|
|
|
|
char type = ClassifyType(typestr, dummy, dummy, dummy);
|
|
|
|
if (type == 'f') type = 's';
|
|
|
|
std::string typeCode = "";
|
|
|
|
InstructionTypeCode(typestr, ClassS, quad, typeCode);
|
|
|
|
s += TypeString('s', typestr) + " __d1 = vget_lane_" +
|
|
|
|
typeCode + "(__a, 0);\\\n" +
|
|
|
|
" " + TypeString('s', typestr) + " __e1 = vget_lane_" +
|
|
|
|
typeCode + "(__b, __c);\\\n" +
|
|
|
|
" " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" +
|
|
|
|
typeCode + "(__d1, __e1);\\\n" +
|
|
|
|
" " + TypeString('d', typestr) + " __g1;\\\n" +
|
|
|
|
" vset_lane_" + typeCode + "(__f1, __g1, __c);";
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
case OpScalarVMulXLaneQ: {
|
|
|
|
bool dummy = false;
|
|
|
|
char type = ClassifyType(typestr, dummy, dummy, dummy);
|
|
|
|
if (type == 'f') type = 's';
|
|
|
|
std::string typeCode = "";
|
|
|
|
InstructionTypeCode(typestr, ClassS, quad, typeCode);
|
|
|
|
s += TypeString('s', typestr) + " __d1 = vget_lane_" +
|
|
|
|
typeCode + "(__a, 0);\\\n" +
|
|
|
|
" " + TypeString('s', typestr) + " __e1 = vgetq_lane_" +
|
|
|
|
typeCode + "(__b, __c);\\\n" +
|
|
|
|
" " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" +
|
|
|
|
typeCode + "(__d1, __e1);\\\n" +
|
|
|
|
" " + TypeString('d', typestr) + " __g1;\\\n" +
|
|
|
|
" vset_lane_" + typeCode + "(__f1, __g1, 0);";
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2011-10-06 21:03:08 +08:00
|
|
|
default:
|
2012-10-26 00:37:08 +08:00
|
|
|
PrintFatalError("unknown OpKind!");
|
2011-10-06 21:03:08 +08:00
|
|
|
}
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
|
|
|
|
unsigned mod = proto[0];
|
|
|
|
|
|
|
|
if (mod == 'v' || mod == 'f')
|
|
|
|
mod = proto[1];
|
|
|
|
|
|
|
|
bool quad = false;
|
|
|
|
bool poly = false;
|
|
|
|
bool usgn = false;
|
|
|
|
bool scal = false;
|
|
|
|
bool cnst = false;
|
|
|
|
bool pntr = false;
|
|
|
|
|
|
|
|
// Base type to get the type string for.
|
|
|
|
char type = ClassifyType(typestr, quad, poly, usgn);
|
|
|
|
|
|
|
|
// Based on the modifying character, change the type and width if necessary.
|
|
|
|
type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
|
|
|
|
|
2011-11-08 09:16:11 +08:00
|
|
|
NeonTypeFlags::EltType ET;
|
2011-10-06 21:03:08 +08:00
|
|
|
switch (type) {
|
|
|
|
case 'c':
|
2011-11-08 09:16:11 +08:00
|
|
|
ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8;
|
2011-10-06 21:03:08 +08:00
|
|
|
break;
|
|
|
|
case 's':
|
2011-11-08 09:16:11 +08:00
|
|
|
ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16;
|
2011-10-06 21:03:08 +08:00
|
|
|
break;
|
|
|
|
case 'i':
|
2011-11-08 09:16:11 +08:00
|
|
|
ET = NeonTypeFlags::Int32;
|
2011-10-06 21:03:08 +08:00
|
|
|
break;
|
|
|
|
case 'l':
|
2013-11-14 11:29:16 +08:00
|
|
|
ET = poly ? NeonTypeFlags::Poly64 : NeonTypeFlags::Int64;
|
2011-10-06 21:03:08 +08:00
|
|
|
break;
|
|
|
|
case 'h':
|
2011-11-08 09:16:11 +08:00
|
|
|
ET = NeonTypeFlags::Float16;
|
2011-10-06 21:03:08 +08:00
|
|
|
break;
|
|
|
|
case 'f':
|
2011-11-08 09:16:11 +08:00
|
|
|
ET = NeonTypeFlags::Float32;
|
2011-10-06 21:03:08 +08:00
|
|
|
break;
|
2013-08-01 17:23:19 +08:00
|
|
|
case 'd':
|
|
|
|
ET = NeonTypeFlags::Float64;
|
|
|
|
break;
|
2011-10-06 21:03:08 +08:00
|
|
|
default:
|
2012-10-26 00:37:08 +08:00
|
|
|
PrintFatalError("unhandled type!");
|
2011-10-06 21:03:08 +08:00
|
|
|
}
|
2011-11-08 09:16:11 +08:00
|
|
|
NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g');
|
|
|
|
return Flags.getFlags();
|
2011-10-06 21:03:08 +08:00
|
|
|
}
|
|
|
|
|
2013-10-05 16:22:55 +08:00
|
|
|
static bool ProtoHasScalar(const std::string proto)
|
|
|
|
{
|
|
|
|
return (proto.find('s') != std::string::npos
|
|
|
|
|| proto.find('r') != std::string::npos);
|
|
|
|
}
|
|
|
|
|
2011-10-06 21:03:08 +08:00
|
|
|
// Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a)
|
|
|
|
static std::string GenBuiltin(const std::string &name, const std::string &proto,
|
|
|
|
StringRef typestr, ClassKind ck) {
|
|
|
|
std::string s;
|
|
|
|
|
|
|
|
// If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
|
|
|
|
// sret-like argument.
|
2013-11-14 09:57:55 +08:00
|
|
|
bool sret = IsMultiVecProto(proto[0]);
|
2011-10-06 21:03:08 +08:00
|
|
|
|
|
|
|
bool define = UseMacro(proto);
|
|
|
|
|
|
|
|
// Check if the prototype has a scalar operand with the type of the vector
|
|
|
|
// elements. If not, bitcasting the args will take care of arg checking.
|
|
|
|
// The actual signedness etc. will be taken care of with special enums.
|
2013-10-05 16:22:55 +08:00
|
|
|
if (!ProtoHasScalar(proto))
|
2011-10-06 21:03:08 +08:00
|
|
|
ck = ClassB;
|
|
|
|
|
|
|
|
if (proto[0] != 'v') {
|
|
|
|
std::string ts = TypeString(proto[0], typestr);
|
|
|
|
|
|
|
|
if (define) {
|
|
|
|
if (sret)
|
|
|
|
s += ts + " r; ";
|
|
|
|
else
|
|
|
|
s += "(" + ts + ")";
|
|
|
|
} else if (sret) {
|
|
|
|
s += ts + " r; ";
|
|
|
|
} else {
|
|
|
|
s += "return (" + ts + ")";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool splat = proto.find('a') != std::string::npos;
|
|
|
|
|
|
|
|
s += "__builtin_neon_";
|
|
|
|
if (splat) {
|
|
|
|
// Call the non-splat builtin: chop off the "_n" suffix from the name.
|
|
|
|
std::string vname(name, 0, name.size()-2);
|
|
|
|
s += MangleName(vname, typestr, ck);
|
|
|
|
} else {
|
|
|
|
s += MangleName(name, typestr, ck);
|
|
|
|
}
|
|
|
|
s += "(";
|
|
|
|
|
|
|
|
// Pass the address of the return variable as the first argument to sret-like
|
|
|
|
// builtins.
|
|
|
|
if (sret)
|
|
|
|
s += "&r, ";
|
|
|
|
|
|
|
|
char arg = 'a';
|
|
|
|
for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
|
|
|
|
std::string args = std::string(&arg, 1);
|
|
|
|
|
|
|
|
// Use the local temporaries instead of the macro arguments.
|
|
|
|
args = "__" + args;
|
|
|
|
|
|
|
|
bool argQuad = false;
|
|
|
|
bool argPoly = false;
|
|
|
|
bool argUsgn = false;
|
|
|
|
bool argScalar = false;
|
|
|
|
bool dummy = false;
|
|
|
|
char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn);
|
|
|
|
argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar,
|
|
|
|
dummy, dummy);
|
|
|
|
|
|
|
|
// Handle multiple-vector values specially, emitting each subvector as an
|
|
|
|
// argument to the __builtin.
|
2013-11-14 09:57:55 +08:00
|
|
|
unsigned NumOfVec = 0;
|
2011-10-06 21:03:08 +08:00
|
|
|
if (proto[i] >= '2' && proto[i] <= '4') {
|
2013-11-14 09:57:55 +08:00
|
|
|
NumOfVec = proto[i] - '0';
|
|
|
|
} else if (proto[i] >= 'B' && proto[i] <= 'D') {
|
|
|
|
NumOfVec = proto[i] - 'A' + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (NumOfVec > 0) {
|
2011-10-06 21:03:08 +08:00
|
|
|
// Check if an explicit cast is needed.
|
|
|
|
if (argType != 'c' || argPoly || argUsgn)
|
|
|
|
args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args;
|
|
|
|
|
2013-11-14 09:57:55 +08:00
|
|
|
for (unsigned vi = 0, ve = NumOfVec; vi != ve; ++vi) {
|
2011-10-06 21:03:08 +08:00
|
|
|
s += args + ".val[" + utostr(vi) + "]";
|
|
|
|
if ((vi + 1) < ve)
|
|
|
|
s += ", ";
|
|
|
|
}
|
|
|
|
if ((i + 1) < e)
|
|
|
|
s += ", ";
|
|
|
|
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (splat && (i + 1) == e)
|
|
|
|
args = Duplicate(GetNumElements(typestr, argQuad), typestr, args);
|
|
|
|
|
|
|
|
// Check if an explicit cast is needed.
|
|
|
|
if ((splat || !argScalar) &&
|
|
|
|
((ck == ClassB && argType != 'c') || argPoly || argUsgn)) {
|
|
|
|
std::string argTypeStr = "c";
|
|
|
|
if (ck != ClassB)
|
|
|
|
argTypeStr = argType;
|
|
|
|
if (argQuad)
|
|
|
|
argTypeStr = "Q" + argTypeStr;
|
|
|
|
args = "(" + TypeString('d', argTypeStr) + ")" + args;
|
|
|
|
}
|
|
|
|
|
|
|
|
s += args;
|
|
|
|
if ((i + 1) < e)
|
|
|
|
s += ", ";
|
|
|
|
}
|
|
|
|
|
|
|
|
// Extra constant integer to hold type class enum for this function, e.g. s8
|
|
|
|
if (ck == ClassB)
|
|
|
|
s += ", " + utostr(GetNeonEnum(proto, typestr));
|
|
|
|
|
|
|
|
s += ");";
|
|
|
|
|
|
|
|
if (proto[0] != 'v' && sret) {
|
|
|
|
if (define)
|
|
|
|
s += " r;";
|
|
|
|
else
|
|
|
|
s += " return r;";
|
|
|
|
}
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
static std::string GenBuiltinDef(const std::string &name,
|
|
|
|
const std::string &proto,
|
|
|
|
StringRef typestr, ClassKind ck) {
|
|
|
|
std::string s("BUILTIN(__builtin_neon_");
|
|
|
|
|
|
|
|
// If all types are the same size, bitcasting the args will take care
|
|
|
|
// of arg checking. The actual signedness etc. will be taken care of with
|
|
|
|
// special enums.
|
2013-10-05 16:22:55 +08:00
|
|
|
if (!ProtoHasScalar(proto))
|
2011-10-06 21:03:08 +08:00
|
|
|
ck = ClassB;
|
|
|
|
|
|
|
|
s += MangleName(name, typestr, ck);
|
|
|
|
s += ", \"";
|
|
|
|
|
|
|
|
for (unsigned i = 0, e = proto.size(); i != e; ++i)
|
|
|
|
s += BuiltinTypeString(proto[i], typestr, ck, i == 0);
|
|
|
|
|
|
|
|
// Extra constant integer to hold type class enum for this function, e.g. s8
|
|
|
|
if (ck == ClassB)
|
|
|
|
s += "i";
|
|
|
|
|
|
|
|
s += "\", \"n\")";
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
static std::string GenIntrinsic(const std::string &name,
|
|
|
|
const std::string &proto,
|
|
|
|
StringRef outTypeStr, StringRef inTypeStr,
|
|
|
|
OpKind kind, ClassKind classKind) {
|
|
|
|
assert(!proto.empty() && "");
|
2012-05-10 02:17:30 +08:00
|
|
|
bool define = UseMacro(proto) && kind != OpUnavailable;
|
2011-10-06 21:03:08 +08:00
|
|
|
std::string s;
|
|
|
|
|
|
|
|
// static always inline + return type
|
|
|
|
if (define)
|
|
|
|
s += "#define ";
|
|
|
|
else
|
|
|
|
s += "__ai " + TypeString(proto[0], outTypeStr) + " ";
|
|
|
|
|
|
|
|
// Function name with type suffix
|
|
|
|
std::string mangledName = MangleName(name, outTypeStr, ClassS);
|
|
|
|
if (outTypeStr != inTypeStr) {
|
|
|
|
// If the input type is different (e.g., for vreinterpret), append a suffix
|
|
|
|
// for the input type. String off a "Q" (quad) prefix so that MangleName
|
|
|
|
// does not insert another "q" in the name.
|
|
|
|
unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
|
|
|
|
StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
|
|
|
|
mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
|
|
|
|
}
|
|
|
|
s += mangledName;
|
|
|
|
|
|
|
|
// Function arguments
|
2013-10-11 10:34:30 +08:00
|
|
|
s += GenArgs(proto, inTypeStr, name);
|
2011-10-06 21:03:08 +08:00
|
|
|
|
|
|
|
// Definition.
|
|
|
|
if (define) {
|
|
|
|
s += " __extension__ ({ \\\n ";
|
2013-10-11 10:34:30 +08:00
|
|
|
s += GenMacroLocals(proto, inTypeStr, name);
|
2012-05-10 02:17:30 +08:00
|
|
|
} else if (kind == OpUnavailable) {
|
|
|
|
s += " __attribute__((unavailable));\n";
|
|
|
|
return s;
|
|
|
|
} else
|
2012-08-04 01:30:46 +08:00
|
|
|
s += " {\n ";
|
2011-10-06 21:03:08 +08:00
|
|
|
|
|
|
|
if (kind != OpNone)
|
Inplement aarch64 neon instructions in AdvSIMD(shift). About 24 shift instructions:
sshr,ushr,ssra,usra,srshr,urshr,srsra,ursra,sri,shl,sli,sqshlu,sqshl,uqshl,shrn,sqrshr$
and 4 convert instructions:
scvtf,ucvtf,fcvtzs,fcvtzu
llvm-svn: 189926
2013-09-04 17:29:13 +08:00
|
|
|
s += GenOpString(name, kind, proto, outTypeStr);
|
2011-10-06 21:03:08 +08:00
|
|
|
else
|
|
|
|
s += GenBuiltin(name, proto, outTypeStr, classKind);
|
|
|
|
if (define)
|
|
|
|
s += " })";
|
|
|
|
else
|
|
|
|
s += " }";
|
|
|
|
s += "\n";
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h
|
|
|
|
/// is comprised of type definitions and function declarations.
|
|
|
|
void NeonEmitter::run(raw_ostream &OS) {
|
|
|
|
OS <<
|
|
|
|
"/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------"
|
|
|
|
"---===\n"
|
|
|
|
" *\n"
|
|
|
|
" * Permission is hereby granted, free of charge, to any person obtaining "
|
|
|
|
"a copy\n"
|
|
|
|
" * of this software and associated documentation files (the \"Software\"),"
|
|
|
|
" to deal\n"
|
|
|
|
" * in the Software without restriction, including without limitation the "
|
|
|
|
"rights\n"
|
|
|
|
" * to use, copy, modify, merge, publish, distribute, sublicense, "
|
|
|
|
"and/or sell\n"
|
|
|
|
" * copies of the Software, and to permit persons to whom the Software is\n"
|
|
|
|
" * furnished to do so, subject to the following conditions:\n"
|
|
|
|
" *\n"
|
|
|
|
" * The above copyright notice and this permission notice shall be "
|
|
|
|
"included in\n"
|
|
|
|
" * all copies or substantial portions of the Software.\n"
|
|
|
|
" *\n"
|
|
|
|
" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
|
|
|
|
"EXPRESS OR\n"
|
|
|
|
" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
|
|
|
|
"MERCHANTABILITY,\n"
|
|
|
|
" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
|
|
|
|
"SHALL THE\n"
|
|
|
|
" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
|
|
|
|
"OTHER\n"
|
|
|
|
" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
|
|
|
|
"ARISING FROM,\n"
|
|
|
|
" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
|
|
|
|
"DEALINGS IN\n"
|
|
|
|
" * THE SOFTWARE.\n"
|
|
|
|
" *\n"
|
|
|
|
" *===--------------------------------------------------------------------"
|
|
|
|
"---===\n"
|
|
|
|
" */\n\n";
|
|
|
|
|
|
|
|
OS << "#ifndef __ARM_NEON_H\n";
|
|
|
|
OS << "#define __ARM_NEON_H\n\n";
|
|
|
|
|
2013-08-01 17:23:19 +08:00
|
|
|
OS << "#if !defined(__ARM_NEON__) && !defined(__AARCH_FEATURE_ADVSIMD)\n";
|
2011-10-06 21:03:08 +08:00
|
|
|
OS << "#error \"NEON support not enabled\"\n";
|
|
|
|
OS << "#endif\n\n";
|
|
|
|
|
|
|
|
OS << "#include <stdint.h>\n\n";
|
|
|
|
|
|
|
|
// Emit NEON-specific scalar typedefs.
|
|
|
|
OS << "typedef float float32_t;\n";
|
2013-08-01 17:23:19 +08:00
|
|
|
OS << "typedef __fp16 float16_t;\n";
|
|
|
|
|
|
|
|
OS << "#ifdef __aarch64__\n";
|
|
|
|
OS << "typedef double float64_t;\n";
|
|
|
|
OS << "#endif\n\n";
|
|
|
|
|
|
|
|
// For now, signedness of polynomial types depends on target
|
|
|
|
OS << "#ifdef __aarch64__\n";
|
|
|
|
OS << "typedef uint8_t poly8_t;\n";
|
|
|
|
OS << "typedef uint16_t poly16_t;\n";
|
2013-11-14 11:29:16 +08:00
|
|
|
OS << "typedef uint64_t poly64_t;\n";
|
2013-08-01 17:23:19 +08:00
|
|
|
OS << "#else\n";
|
2011-10-06 21:03:08 +08:00
|
|
|
OS << "typedef int8_t poly8_t;\n";
|
|
|
|
OS << "typedef int16_t poly16_t;\n";
|
2013-08-01 17:23:19 +08:00
|
|
|
OS << "#endif\n";
|
2011-10-06 21:03:08 +08:00
|
|
|
|
|
|
|
// Emit Neon vector typedefs.
|
2013-08-01 17:23:19 +08:00
|
|
|
std::string TypedefTypes(
|
2013-11-14 11:29:16 +08:00
|
|
|
"cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl");
|
2011-10-06 21:03:08 +08:00
|
|
|
SmallVector<StringRef, 24> TDTypeVec;
|
|
|
|
ParseTypes(0, TypedefTypes, TDTypeVec);
|
|
|
|
|
|
|
|
// Emit vector typedefs.
|
2013-10-04 17:21:17 +08:00
|
|
|
bool isA64 = false;
|
2013-11-14 11:29:16 +08:00
|
|
|
bool preinsert;
|
|
|
|
bool postinsert;
|
2011-10-06 21:03:08 +08:00
|
|
|
for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
|
|
|
|
bool dummy, quad = false, poly = false;
|
2013-08-01 17:23:19 +08:00
|
|
|
char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
|
2013-11-14 11:29:16 +08:00
|
|
|
preinsert = false;
|
|
|
|
postinsert = false;
|
2013-08-01 17:23:19 +08:00
|
|
|
|
2013-11-14 11:29:16 +08:00
|
|
|
if (type == 'd' || (type == 'l' && poly)) {
|
2013-10-04 17:21:17 +08:00
|
|
|
preinsert = isA64? false: true;
|
2013-08-01 17:23:19 +08:00
|
|
|
isA64 = true;
|
2013-10-04 17:21:17 +08:00
|
|
|
} else {
|
|
|
|
postinsert = isA64? true: false;
|
|
|
|
isA64 = false;
|
|
|
|
}
|
|
|
|
if (postinsert)
|
|
|
|
OS << "#endif\n";
|
|
|
|
if (preinsert)
|
2013-08-01 17:23:19 +08:00
|
|
|
OS << "#ifdef __aarch64__\n";
|
|
|
|
|
2011-10-06 21:03:08 +08:00
|
|
|
if (poly)
|
|
|
|
OS << "typedef __attribute__((neon_polyvector_type(";
|
|
|
|
else
|
|
|
|
OS << "typedef __attribute__((neon_vector_type(";
|
|
|
|
|
|
|
|
unsigned nElts = GetNumElements(TDTypeVec[i], quad);
|
|
|
|
OS << utostr(nElts) << "))) ";
|
|
|
|
if (nElts < 10)
|
|
|
|
OS << " ";
|
|
|
|
|
|
|
|
OS << TypeString('s', TDTypeVec[i]);
|
|
|
|
OS << " " << TypeString('d', TDTypeVec[i]) << ";\n";
|
2013-08-01 17:23:19 +08:00
|
|
|
|
2011-10-06 21:03:08 +08:00
|
|
|
}
|
2013-11-14 11:29:16 +08:00
|
|
|
postinsert = isA64? true: false;
|
|
|
|
if (postinsert)
|
|
|
|
OS << "#endif\n";
|
2011-10-06 21:03:08 +08:00
|
|
|
OS << "\n";
|
|
|
|
|
|
|
|
// Emit struct typedefs.
|
2013-10-04 17:21:17 +08:00
|
|
|
isA64 = false;
|
2011-10-06 21:03:08 +08:00
|
|
|
for (unsigned vi = 2; vi != 5; ++vi) {
|
|
|
|
for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
|
2013-08-01 17:23:19 +08:00
|
|
|
bool dummy, quad = false, poly = false;
|
|
|
|
char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
|
2013-11-14 11:29:16 +08:00
|
|
|
preinsert = false;
|
|
|
|
postinsert = false;
|
2013-08-01 17:23:19 +08:00
|
|
|
|
2013-11-14 11:29:16 +08:00
|
|
|
if (type == 'd' || (type == 'l' && poly)) {
|
2013-10-04 17:21:17 +08:00
|
|
|
preinsert = isA64? false: true;
|
2013-08-01 17:23:19 +08:00
|
|
|
isA64 = true;
|
2013-10-04 17:21:17 +08:00
|
|
|
} else {
|
|
|
|
postinsert = isA64? true: false;
|
|
|
|
isA64 = false;
|
|
|
|
}
|
|
|
|
if (postinsert)
|
|
|
|
OS << "#endif\n";
|
|
|
|
if (preinsert)
|
2013-08-01 17:23:19 +08:00
|
|
|
OS << "#ifdef __aarch64__\n";
|
|
|
|
|
2011-10-06 21:03:08 +08:00
|
|
|
std::string ts = TypeString('d', TDTypeVec[i]);
|
|
|
|
std::string vs = TypeString('0' + vi, TDTypeVec[i]);
|
|
|
|
OS << "typedef struct " << vs << " {\n";
|
|
|
|
OS << " " << ts << " val";
|
|
|
|
OS << "[" << utostr(vi) << "]";
|
|
|
|
OS << ";\n} ";
|
2013-08-01 17:23:19 +08:00
|
|
|
OS << vs << ";\n";
|
|
|
|
OS << "\n";
|
2011-10-06 21:03:08 +08:00
|
|
|
}
|
|
|
|
}
|
2013-11-14 11:29:16 +08:00
|
|
|
postinsert = isA64? true: false;
|
|
|
|
if (postinsert)
|
|
|
|
OS << "#endif\n";
|
|
|
|
OS << "\n";
|
2011-10-06 21:03:08 +08:00
|
|
|
|
2013-04-13 04:17:20 +08:00
|
|
|
OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n";
|
2011-10-06 21:03:08 +08:00
|
|
|
|
|
|
|
std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
|
|
|
|
|
2013-08-01 17:23:19 +08:00
|
|
|
StringMap<ClassKind> EmittedMap;
|
|
|
|
|
2011-10-06 21:03:08 +08:00
|
|
|
// Emit vmovl, vmull and vabd intrinsics first so they can be used by other
|
|
|
|
// intrinsics. (Some of the saturating multiply instructions are also
|
|
|
|
// used to implement the corresponding "_lane" variants, but tablegen
|
|
|
|
// sorts the records into alphabetical order so that the "_lane" variants
|
|
|
|
// come after the intrinsics they use.)
|
2013-08-01 17:23:19 +08:00
|
|
|
emitIntrinsic(OS, Records.getDef("VMOVL"), EmittedMap);
|
|
|
|
emitIntrinsic(OS, Records.getDef("VMULL"), EmittedMap);
|
|
|
|
emitIntrinsic(OS, Records.getDef("VABD"), EmittedMap);
|
Implement aarch64 neon instruction set AdvSIMD (3V Diff), covering the following 26 instructions,
SADDL, UADDL, SADDW, UADDW, SSUBL, USUBL, SSUBW, USUBW, ADDHN, RADDHN, SABAL, UABAL, SUBHN, RSUBHN, SABDL, UABDL, SMLAL, UMLAL, SMLSL, UMLSL, SQDMLAL, SQDMLSL, SMULL, UMULL, SQDMULL, PMULL
llvm-svn: 190289
2013-09-09 10:21:08 +08:00
|
|
|
emitIntrinsic(OS, Records.getDef("VABDL"), EmittedMap);
|
2013-08-01 17:23:19 +08:00
|
|
|
|
|
|
|
// ARM intrinsics must be emitted before AArch64 intrinsics to ensure
|
|
|
|
// common intrinsics appear only once in the output stream.
|
|
|
|
// The check for uniquiness is done in emitIntrinsic.
|
|
|
|
// Emit ARM intrinsics.
|
2011-10-06 21:03:08 +08:00
|
|
|
for (unsigned i = 0, e = RV.size(); i != e; ++i) {
|
|
|
|
Record *R = RV[i];
|
2013-08-01 17:23:19 +08:00
|
|
|
|
|
|
|
// Skip AArch64 intrinsics; they will be emitted at the end.
|
|
|
|
bool isA64 = R->getValueAsBit("isA64");
|
|
|
|
if (isA64)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (R->getName() != "VMOVL" && R->getName() != "VMULL" &&
|
2011-10-06 21:03:08 +08:00
|
|
|
R->getName() != "VABD")
|
2013-08-01 17:23:19 +08:00
|
|
|
emitIntrinsic(OS, R, EmittedMap);
|
2011-10-06 21:03:08 +08:00
|
|
|
}
|
|
|
|
|
2013-08-01 17:23:19 +08:00
|
|
|
// Emit AArch64-specific intrinsics.
|
|
|
|
OS << "#ifdef __aarch64__\n";
|
|
|
|
|
Implement aarch64 neon instruction set AdvSIMD (3V Diff), covering the following 26 instructions,
SADDL, UADDL, SADDW, UADDW, SSUBL, USUBL, SSUBW, USUBW, ADDHN, RADDHN, SABAL, UABAL, SUBHN, RSUBHN, SABDL, UABDL, SMLAL, UMLAL, SMLSL, UMLSL, SQDMLAL, SQDMLSL, SMULL, UMULL, SQDMULL, PMULL
llvm-svn: 190289
2013-09-09 10:21:08 +08:00
|
|
|
emitIntrinsic(OS, Records.getDef("VMOVL_HIGH"), EmittedMap);
|
|
|
|
emitIntrinsic(OS, Records.getDef("VMULL_HIGH"), EmittedMap);
|
|
|
|
emitIntrinsic(OS, Records.getDef("VABDL_HIGH"), EmittedMap);
|
|
|
|
|
2013-08-01 17:23:19 +08:00
|
|
|
for (unsigned i = 0, e = RV.size(); i != e; ++i) {
|
|
|
|
Record *R = RV[i];
|
|
|
|
|
|
|
|
// Skip ARM intrinsics already included above.
|
|
|
|
bool isA64 = R->getValueAsBit("isA64");
|
|
|
|
if (!isA64)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
emitIntrinsic(OS, R, EmittedMap);
|
|
|
|
}
|
|
|
|
|
|
|
|
OS << "#endif\n\n";
|
|
|
|
|
2011-10-06 21:03:08 +08:00
|
|
|
OS << "#undef __ai\n\n";
|
|
|
|
OS << "#endif /* __ARM_NEON_H */\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
/// emitIntrinsic - Write out the arm_neon.h header file definitions for the
|
2013-08-01 17:23:19 +08:00
|
|
|
/// intrinsics specified by record R checking for intrinsic uniqueness.
|
|
|
|
void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R,
|
|
|
|
StringMap<ClassKind> &EmittedMap) {
|
2011-10-06 21:03:08 +08:00
|
|
|
std::string name = R->getValueAsString("Name");
|
|
|
|
std::string Proto = R->getValueAsString("Prototype");
|
|
|
|
std::string Types = R->getValueAsString("Types");
|
|
|
|
|
|
|
|
SmallVector<StringRef, 16> TypeVec;
|
|
|
|
ParseTypes(R, Types, TypeVec);
|
|
|
|
|
|
|
|
OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
|
|
|
|
|
|
|
|
ClassKind classKind = ClassNone;
|
|
|
|
if (R->getSuperClasses().size() >= 2)
|
|
|
|
classKind = ClassMap[R->getSuperClasses()[1]];
|
|
|
|
if (classKind == ClassNone && kind == OpNone)
|
2012-10-26 00:37:08 +08:00
|
|
|
PrintFatalError(R->getLoc(), "Builtin has no class kind");
|
2011-10-06 21:03:08 +08:00
|
|
|
|
|
|
|
for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
|
|
|
|
if (kind == OpReinterpret) {
|
|
|
|
bool outQuad = false;
|
|
|
|
bool dummy = false;
|
|
|
|
(void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
|
|
|
|
for (unsigned srcti = 0, srcte = TypeVec.size();
|
|
|
|
srcti != srcte; ++srcti) {
|
|
|
|
bool inQuad = false;
|
|
|
|
(void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
|
|
|
|
if (srcti == ti || inQuad != outQuad)
|
|
|
|
continue;
|
2013-08-01 17:23:19 +08:00
|
|
|
std::string s = GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti],
|
|
|
|
OpCast, ClassS);
|
|
|
|
if (EmittedMap.count(s))
|
|
|
|
continue;
|
|
|
|
EmittedMap[s] = ClassS;
|
|
|
|
OS << s;
|
2011-10-06 21:03:08 +08:00
|
|
|
}
|
|
|
|
} else {
|
2013-08-01 17:23:19 +08:00
|
|
|
std::string s =
|
|
|
|
GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind);
|
|
|
|
if (EmittedMap.count(s))
|
|
|
|
continue;
|
|
|
|
EmittedMap[s] = classKind;
|
|
|
|
OS << s;
|
2011-10-06 21:03:08 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
OS << "\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
static unsigned RangeFromType(const char mod, StringRef typestr) {
|
|
|
|
// base type to get the type string for.
|
|
|
|
bool quad = false, dummy = false;
|
|
|
|
char type = ClassifyType(typestr, quad, dummy, dummy);
|
|
|
|
type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy);
|
|
|
|
|
|
|
|
switch (type) {
|
|
|
|
case 'c':
|
|
|
|
return (8 << (int)quad) - 1;
|
|
|
|
case 'h':
|
|
|
|
case 's':
|
|
|
|
return (4 << (int)quad) - 1;
|
|
|
|
case 'f':
|
|
|
|
case 'i':
|
|
|
|
return (2 << (int)quad) - 1;
|
2013-10-04 17:21:17 +08:00
|
|
|
case 'd':
|
2011-10-06 21:03:08 +08:00
|
|
|
case 'l':
|
|
|
|
return (1 << (int)quad) - 1;
|
|
|
|
default:
|
2012-10-26 00:37:08 +08:00
|
|
|
PrintFatalError("unhandled type!");
|
2011-10-06 21:03:08 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-11-01 03:29:05 +08:00
|
|
|
static unsigned RangeScalarShiftImm(const char mod, StringRef typestr) {
|
|
|
|
// base type to get the type string for.
|
|
|
|
bool dummy = false;
|
|
|
|
char type = ClassifyType(typestr, dummy, dummy, dummy);
|
|
|
|
type = ModType(mod, type, dummy, dummy, dummy, dummy, dummy, dummy);
|
|
|
|
|
|
|
|
switch (type) {
|
|
|
|
case 'c':
|
|
|
|
return 7;
|
|
|
|
case 'h':
|
|
|
|
case 's':
|
|
|
|
return 15;
|
|
|
|
case 'f':
|
|
|
|
case 'i':
|
|
|
|
return 31;
|
|
|
|
case 'd':
|
|
|
|
case 'l':
|
|
|
|
return 63;
|
|
|
|
default:
|
|
|
|
PrintFatalError("unhandled type!");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-08-01 17:23:19 +08:00
|
|
|
/// Generate the ARM and AArch64 intrinsic range checking code for
|
|
|
|
/// shift/lane immediates, checking for unique declarations.
|
|
|
|
void
|
|
|
|
NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
|
|
|
|
StringMap<ClassKind> &A64IntrinsicMap,
|
|
|
|
bool isA64RangeCheck) {
|
|
|
|
std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
|
2011-10-06 21:03:08 +08:00
|
|
|
StringMap<OpKind> EmittedMap;
|
|
|
|
|
2013-08-01 17:23:19 +08:00
|
|
|
// Generate the intrinsic range checking code for shift/lane immediates.
|
|
|
|
if (isA64RangeCheck)
|
|
|
|
OS << "#ifdef GET_NEON_AARCH64_IMMEDIATE_CHECK\n";
|
|
|
|
else
|
|
|
|
OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
|
|
|
|
|
2011-10-06 21:03:08 +08:00
|
|
|
for (unsigned i = 0, e = RV.size(); i != e; ++i) {
|
|
|
|
Record *R = RV[i];
|
2013-08-01 17:23:19 +08:00
|
|
|
|
2011-10-06 21:03:08 +08:00
|
|
|
OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
|
|
|
|
if (k != OpNone)
|
|
|
|
continue;
|
|
|
|
|
2013-08-01 17:23:19 +08:00
|
|
|
std::string name = R->getValueAsString("Name");
|
2011-10-06 21:03:08 +08:00
|
|
|
std::string Proto = R->getValueAsString("Prototype");
|
2013-08-01 17:23:19 +08:00
|
|
|
std::string Types = R->getValueAsString("Types");
|
2013-08-29 15:55:15 +08:00
|
|
|
std::string Rename = name + "@" + Proto;
|
2011-10-06 21:03:08 +08:00
|
|
|
|
|
|
|
// Functions with 'a' (the splat code) in the type prototype should not get
|
|
|
|
// their own builtin as they use the non-splat variant.
|
|
|
|
if (Proto.find('a') != std::string::npos)
|
|
|
|
continue;
|
|
|
|
|
2013-08-01 17:23:19 +08:00
|
|
|
// Functions which do not have an immediate do not need to have range
|
|
|
|
// checking code emitted.
|
|
|
|
size_t immPos = Proto.find('i');
|
|
|
|
if (immPos == std::string::npos)
|
|
|
|
continue;
|
|
|
|
|
2011-10-06 21:03:08 +08:00
|
|
|
SmallVector<StringRef, 16> TypeVec;
|
|
|
|
ParseTypes(R, Types, TypeVec);
|
|
|
|
|
|
|
|
if (R->getSuperClasses().size() < 2)
|
2012-10-26 00:37:08 +08:00
|
|
|
PrintFatalError(R->getLoc(), "Builtin has no class kind");
|
2011-10-06 21:03:08 +08:00
|
|
|
|
|
|
|
ClassKind ck = ClassMap[R->getSuperClasses()[1]];
|
|
|
|
|
2013-08-01 17:23:19 +08:00
|
|
|
// Do not include AArch64 range checks if not generating code for AArch64.
|
|
|
|
bool isA64 = R->getValueAsBit("isA64");
|
|
|
|
if (!isA64RangeCheck && isA64)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
// Include ARM range checks in AArch64 but only if ARM intrinsics are not
|
|
|
|
// redefined by AArch64 to handle new types.
|
2013-08-29 15:55:15 +08:00
|
|
|
if (isA64RangeCheck && !isA64 && A64IntrinsicMap.count(Rename)) {
|
|
|
|
ClassKind &A64CK = A64IntrinsicMap[Rename];
|
2013-08-01 17:23:19 +08:00
|
|
|
if (A64CK == ck && ck != ClassNone)
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2011-10-06 21:03:08 +08:00
|
|
|
for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
|
2013-08-01 17:23:19 +08:00
|
|
|
std::string namestr, shiftstr, rangestr;
|
|
|
|
|
|
|
|
if (R->getValueAsBit("isVCVT_N")) {
|
|
|
|
// VCVT between floating- and fixed-point values takes an immediate
|
Inplement aarch64 neon instructions in AdvSIMD(shift). About 24 shift instructions:
sshr,ushr,ssra,usra,srshr,urshr,srsra,ursra,sri,shl,sli,sqshlu,sqshl,uqshl,shrn,sqrshr$
and 4 convert instructions:
scvtf,ucvtf,fcvtzs,fcvtzu
llvm-svn: 189926
2013-09-04 17:29:13 +08:00
|
|
|
// in the range [1, 32] for f32, or [1, 64] for f64.
|
2013-08-01 17:23:19 +08:00
|
|
|
ck = ClassB;
|
Inplement aarch64 neon instructions in AdvSIMD(shift). About 24 shift instructions:
sshr,ushr,ssra,usra,srshr,urshr,srsra,ursra,sri,shl,sli,sqshlu,sqshl,uqshl,shrn,sqrshr$
and 4 convert instructions:
scvtf,ucvtf,fcvtzs,fcvtzu
llvm-svn: 189926
2013-09-04 17:29:13 +08:00
|
|
|
if (name.find("32") != std::string::npos)
|
|
|
|
rangestr = "l = 1; u = 31"; // upper bound = l + u
|
|
|
|
else if (name.find("64") != std::string::npos)
|
|
|
|
rangestr = "l = 1; u = 63";
|
|
|
|
else
|
|
|
|
PrintFatalError(R->getLoc(),
|
|
|
|
"Fixed point convert name should contains \"32\" or \"64\"");
|
2013-11-01 03:29:05 +08:00
|
|
|
|
|
|
|
} else if (R->getValueAsBit("isScalarShift")) {
|
2013-11-12 02:04:22 +08:00
|
|
|
// Right shifts have an 'r' in the name, left shifts do not. Convert
|
|
|
|
// instructions have the same bounds and right shifts.
|
|
|
|
if (name.find('r') != std::string::npos ||
|
|
|
|
name.find("cvt") != std::string::npos)
|
2013-11-01 03:29:05 +08:00
|
|
|
rangestr = "l = 1; ";
|
|
|
|
|
|
|
|
rangestr += "u = " +
|
|
|
|
utostr(RangeScalarShiftImm(Proto[immPos - 1], TypeVec[ti]));
|
2013-10-05 16:22:55 +08:00
|
|
|
} else if (!ProtoHasScalar(Proto)) {
|
2013-08-01 17:23:19 +08:00
|
|
|
// Builtins which are overloaded by type will need to have their upper
|
|
|
|
// bound computed at Sema time based on the type constant.
|
|
|
|
ck = ClassB;
|
|
|
|
if (R->getValueAsBit("isShift")) {
|
|
|
|
shiftstr = ", true";
|
|
|
|
|
|
|
|
// Right shifts have an 'r' in the name, left shifts do not.
|
|
|
|
if (name.find('r') != std::string::npos)
|
|
|
|
rangestr = "l = 1; ";
|
|
|
|
}
|
|
|
|
rangestr += "u = RFT(TV" + shiftstr + ")";
|
|
|
|
} else {
|
|
|
|
// The immediate generally refers to a lane in the preceding argument.
|
|
|
|
assert(immPos > 0 && "unexpected immediate operand");
|
|
|
|
rangestr =
|
|
|
|
"u = " + utostr(RangeFromType(Proto[immPos - 1], TypeVec[ti]));
|
|
|
|
}
|
|
|
|
// Make sure cases appear only once by uniquing them in a string map.
|
|
|
|
namestr = MangleName(name, TypeVec[ti], ck);
|
|
|
|
if (EmittedMap.count(namestr))
|
2011-10-06 21:03:08 +08:00
|
|
|
continue;
|
2013-08-01 17:23:19 +08:00
|
|
|
EmittedMap[namestr] = OpNone;
|
2011-10-06 21:03:08 +08:00
|
|
|
|
2013-08-01 17:23:19 +08:00
|
|
|
// Calculate the index of the immediate that should be range checked.
|
|
|
|
unsigned immidx = 0;
|
|
|
|
|
|
|
|
// Builtins that return a struct of multiple vectors have an extra
|
|
|
|
// leading arg for the struct return.
|
2013-11-14 09:57:55 +08:00
|
|
|
if (IsMultiVecProto(Proto[0]))
|
2013-08-01 17:23:19 +08:00
|
|
|
++immidx;
|
|
|
|
|
|
|
|
// Add one to the index for each argument until we reach the immediate
|
|
|
|
// to be checked. Structs of vectors are passed as multiple arguments.
|
|
|
|
for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
|
|
|
|
switch (Proto[ii]) {
|
|
|
|
default:
|
|
|
|
immidx += 1;
|
|
|
|
break;
|
|
|
|
case '2':
|
2013-11-14 09:57:55 +08:00
|
|
|
case 'B':
|
2013-08-01 17:23:19 +08:00
|
|
|
immidx += 2;
|
|
|
|
break;
|
|
|
|
case '3':
|
2013-11-14 09:57:55 +08:00
|
|
|
case 'C':
|
2013-08-01 17:23:19 +08:00
|
|
|
immidx += 3;
|
|
|
|
break;
|
|
|
|
case '4':
|
2013-11-14 09:57:55 +08:00
|
|
|
case 'D':
|
2013-08-01 17:23:19 +08:00
|
|
|
immidx += 4;
|
|
|
|
break;
|
|
|
|
case 'i':
|
|
|
|
ie = ii + 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (isA64RangeCheck)
|
|
|
|
OS << "case AArch64::BI__builtin_neon_";
|
|
|
|
else
|
|
|
|
OS << "case ARM::BI__builtin_neon_";
|
|
|
|
OS << MangleName(name, TypeVec[ti], ck) << ": i = " << immidx << "; "
|
|
|
|
<< rangestr << "; break;\n";
|
2011-10-06 21:03:08 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
OS << "#endif\n\n";
|
2013-08-01 17:23:19 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Generate the ARM and AArch64 overloaded type checking code for
|
|
|
|
/// SemaChecking.cpp, checking for unique builtin declarations.
|
|
|
|
void
|
|
|
|
NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
|
|
|
|
StringMap<ClassKind> &A64IntrinsicMap,
|
|
|
|
bool isA64TypeCheck) {
|
|
|
|
std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
|
|
|
|
StringMap<OpKind> EmittedMap;
|
2011-10-06 21:03:08 +08:00
|
|
|
|
|
|
|
// Generate the overloaded type checking code for SemaChecking.cpp
|
2013-08-01 17:23:19 +08:00
|
|
|
if (isA64TypeCheck)
|
|
|
|
OS << "#ifdef GET_NEON_AARCH64_OVERLOAD_CHECK\n";
|
|
|
|
else
|
|
|
|
OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
|
|
|
|
|
2011-10-06 21:03:08 +08:00
|
|
|
for (unsigned i = 0, e = RV.size(); i != e; ++i) {
|
|
|
|
Record *R = RV[i];
|
|
|
|
OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
|
|
|
|
if (k != OpNone)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
std::string Proto = R->getValueAsString("Prototype");
|
|
|
|
std::string Types = R->getValueAsString("Types");
|
|
|
|
std::string name = R->getValueAsString("Name");
|
2013-08-29 15:55:15 +08:00
|
|
|
std::string Rename = name + "@" + Proto;
|
|
|
|
|
2011-10-06 21:03:08 +08:00
|
|
|
// Functions with 'a' (the splat code) in the type prototype should not get
|
|
|
|
// their own builtin as they use the non-splat variant.
|
|
|
|
if (Proto.find('a') != std::string::npos)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
// Functions which have a scalar argument cannot be overloaded, no need to
|
|
|
|
// check them if we are emitting the type checking code.
|
2013-10-05 16:22:55 +08:00
|
|
|
if (ProtoHasScalar(Proto))
|
2011-10-06 21:03:08 +08:00
|
|
|
continue;
|
|
|
|
|
|
|
|
SmallVector<StringRef, 16> TypeVec;
|
|
|
|
ParseTypes(R, Types, TypeVec);
|
|
|
|
|
|
|
|
if (R->getSuperClasses().size() < 2)
|
2012-10-26 00:37:08 +08:00
|
|
|
PrintFatalError(R->getLoc(), "Builtin has no class kind");
|
2011-10-06 21:03:08 +08:00
|
|
|
|
2013-08-01 17:23:19 +08:00
|
|
|
// Do not include AArch64 type checks if not generating code for AArch64.
|
|
|
|
bool isA64 = R->getValueAsBit("isA64");
|
|
|
|
if (!isA64TypeCheck && isA64)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
// Include ARM type check in AArch64 but only if ARM intrinsics
|
|
|
|
// are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
|
|
|
|
// redefined in AArch64 to handle an additional 2 x f64 type.
|
|
|
|
ClassKind ck = ClassMap[R->getSuperClasses()[1]];
|
2013-08-29 15:55:15 +08:00
|
|
|
if (isA64TypeCheck && !isA64 && A64IntrinsicMap.count(Rename)) {
|
|
|
|
ClassKind &A64CK = A64IntrinsicMap[Rename];
|
2013-08-01 17:23:19 +08:00
|
|
|
if (A64CK == ck && ck != ClassNone)
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2011-10-06 21:03:08 +08:00
|
|
|
int si = -1, qi = -1;
|
2012-08-14 09:28:02 +08:00
|
|
|
uint64_t mask = 0, qmask = 0;
|
2011-10-06 21:03:08 +08:00
|
|
|
for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
|
|
|
|
// Generate the switch case(s) for this builtin for the type validation.
|
|
|
|
bool quad = false, poly = false, usgn = false;
|
|
|
|
(void) ClassifyType(TypeVec[ti], quad, poly, usgn);
|
|
|
|
|
|
|
|
if (quad) {
|
|
|
|
qi = ti;
|
2012-08-14 09:28:02 +08:00
|
|
|
qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
|
2011-10-06 21:03:08 +08:00
|
|
|
} else {
|
|
|
|
si = ti;
|
2012-08-14 09:28:02 +08:00
|
|
|
mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
|
2011-10-06 21:03:08 +08:00
|
|
|
}
|
|
|
|
}
|
2011-11-17 05:32:23 +08:00
|
|
|
|
|
|
|
// Check if the builtin function has a pointer or const pointer argument.
|
|
|
|
int PtrArgNum = -1;
|
|
|
|
bool HasConstPtr = false;
|
|
|
|
for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) {
|
|
|
|
char ArgType = Proto[arg];
|
|
|
|
if (ArgType == 'c') {
|
|
|
|
HasConstPtr = true;
|
|
|
|
PtrArgNum = arg - 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (ArgType == 'p') {
|
|
|
|
PtrArgNum = arg - 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// For sret builtins, adjust the pointer argument index.
|
2013-11-14 09:57:55 +08:00
|
|
|
if (PtrArgNum >= 0 && IsMultiVecProto(Proto[0]))
|
2011-11-17 05:32:23 +08:00
|
|
|
PtrArgNum += 1;
|
|
|
|
|
2011-12-20 14:16:48 +08:00
|
|
|
// Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
|
|
|
|
// and vst1_lane intrinsics. Using a pointer to the vector element
|
|
|
|
// type with one of those operations causes codegen to select an aligned
|
|
|
|
// load/store instruction. If you want an unaligned operation,
|
|
|
|
// the pointer argument needs to have less alignment than element type,
|
|
|
|
// so just accept any pointer type.
|
|
|
|
if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") {
|
|
|
|
PtrArgNum = -1;
|
|
|
|
HasConstPtr = false;
|
|
|
|
}
|
|
|
|
|
2011-11-08 13:04:11 +08:00
|
|
|
if (mask) {
|
2013-08-01 17:23:19 +08:00
|
|
|
if (isA64TypeCheck)
|
|
|
|
OS << "case AArch64::BI__builtin_neon_";
|
|
|
|
else
|
|
|
|
OS << "case ARM::BI__builtin_neon_";
|
|
|
|
OS << MangleName(name, TypeVec[si], ClassB) << ": mask = "
|
|
|
|
<< "0x" << utohexstr(mask) << "ULL";
|
2011-11-17 05:32:23 +08:00
|
|
|
if (PtrArgNum >= 0)
|
|
|
|
OS << "; PtrArgNum = " << PtrArgNum;
|
2011-11-08 13:04:11 +08:00
|
|
|
if (HasConstPtr)
|
|
|
|
OS << "; HasConstPtr = true";
|
|
|
|
OS << "; break;\n";
|
|
|
|
}
|
|
|
|
if (qmask) {
|
2013-08-01 17:23:19 +08:00
|
|
|
if (isA64TypeCheck)
|
|
|
|
OS << "case AArch64::BI__builtin_neon_";
|
|
|
|
else
|
|
|
|
OS << "case ARM::BI__builtin_neon_";
|
|
|
|
OS << MangleName(name, TypeVec[qi], ClassB) << ": mask = "
|
|
|
|
<< "0x" << utohexstr(qmask) << "ULL";
|
2011-11-17 05:32:23 +08:00
|
|
|
if (PtrArgNum >= 0)
|
|
|
|
OS << "; PtrArgNum = " << PtrArgNum;
|
2011-11-08 13:04:11 +08:00
|
|
|
if (HasConstPtr)
|
|
|
|
OS << "; HasConstPtr = true";
|
|
|
|
OS << "; break;\n";
|
|
|
|
}
|
2011-10-06 21:03:08 +08:00
|
|
|
}
|
|
|
|
OS << "#endif\n\n";
|
2013-08-01 17:23:19 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// genBuiltinsDef: Generate the BuiltinsARM.def and BuiltinsAArch64.def
|
|
|
|
/// declaration of builtins, checking for unique builtin declarations.
|
|
|
|
void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
|
|
|
|
StringMap<ClassKind> &A64IntrinsicMap,
|
|
|
|
bool isA64GenBuiltinDef) {
|
|
|
|
std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
|
|
|
|
StringMap<OpKind> EmittedMap;
|
|
|
|
|
|
|
|
// Generate BuiltinsARM.def and BuiltinsAArch64.def
|
|
|
|
if (isA64GenBuiltinDef)
|
|
|
|
OS << "#ifdef GET_NEON_AARCH64_BUILTINS\n";
|
|
|
|
else
|
|
|
|
OS << "#ifdef GET_NEON_BUILTINS\n";
|
2011-10-06 21:03:08 +08:00
|
|
|
|
|
|
|
for (unsigned i = 0, e = RV.size(); i != e; ++i) {
|
|
|
|
Record *R = RV[i];
|
|
|
|
OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
|
|
|
|
if (k != OpNone)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
std::string Proto = R->getValueAsString("Prototype");
|
2013-08-01 17:23:19 +08:00
|
|
|
std::string name = R->getValueAsString("Name");
|
2013-08-29 15:55:15 +08:00
|
|
|
std::string Rename = name + "@" + Proto;
|
2011-10-06 21:03:08 +08:00
|
|
|
|
|
|
|
// Functions with 'a' (the splat code) in the type prototype should not get
|
|
|
|
// their own builtin as they use the non-splat variant.
|
|
|
|
if (Proto.find('a') != std::string::npos)
|
|
|
|
continue;
|
|
|
|
|
2013-08-01 17:23:19 +08:00
|
|
|
std::string Types = R->getValueAsString("Types");
|
2011-10-06 21:03:08 +08:00
|
|
|
SmallVector<StringRef, 16> TypeVec;
|
|
|
|
ParseTypes(R, Types, TypeVec);
|
|
|
|
|
|
|
|
if (R->getSuperClasses().size() < 2)
|
2012-10-26 00:37:08 +08:00
|
|
|
PrintFatalError(R->getLoc(), "Builtin has no class kind");
|
2011-10-06 21:03:08 +08:00
|
|
|
|
|
|
|
ClassKind ck = ClassMap[R->getSuperClasses()[1]];
|
|
|
|
|
2013-08-01 17:23:19 +08:00
|
|
|
// Do not include AArch64 BUILTIN() macros if not generating
|
|
|
|
// code for AArch64
|
|
|
|
bool isA64 = R->getValueAsBit("isA64");
|
|
|
|
if (!isA64GenBuiltinDef && isA64)
|
|
|
|
continue;
|
2011-10-06 21:03:08 +08:00
|
|
|
|
2013-08-01 17:23:19 +08:00
|
|
|
// Include ARM BUILTIN() macros in AArch64 but only if ARM intrinsics
|
|
|
|
// are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
|
|
|
|
// redefined in AArch64 to handle an additional 2 x f64 type.
|
2013-08-29 15:55:15 +08:00
|
|
|
if (isA64GenBuiltinDef && !isA64 && A64IntrinsicMap.count(Rename)) {
|
|
|
|
ClassKind &A64CK = A64IntrinsicMap[Rename];
|
2013-08-01 17:23:19 +08:00
|
|
|
if (A64CK == ck && ck != ClassNone)
|
2011-10-06 21:03:08 +08:00
|
|
|
continue;
|
2013-08-01 17:23:19 +08:00
|
|
|
}
|
2011-10-06 21:03:08 +08:00
|
|
|
|
2013-08-01 17:23:19 +08:00
|
|
|
for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
|
|
|
|
// Generate the declaration for this builtin, ensuring
|
|
|
|
// that each unique BUILTIN() macro appears only once in the output
|
|
|
|
// stream.
|
|
|
|
std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck);
|
|
|
|
if (EmittedMap.count(bd))
|
|
|
|
continue;
|
2011-10-06 21:03:08 +08:00
|
|
|
|
2013-08-01 17:23:19 +08:00
|
|
|
EmittedMap[bd] = OpNone;
|
|
|
|
OS << bd << "\n";
|
2011-10-06 21:03:08 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
OS << "#endif\n\n";
|
|
|
|
}
|
|
|
|
|
2013-08-01 17:23:19 +08:00
|
|
|
/// runHeader - Emit a file with sections defining:
|
|
|
|
/// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
|
|
|
|
/// 2. the SemaChecking code for the type overload checking.
|
|
|
|
/// 3. the SemaChecking code for validation of intrinsic immediate arguments.
|
|
|
|
void NeonEmitter::runHeader(raw_ostream &OS) {
|
|
|
|
std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
|
|
|
|
|
|
|
|
// build a map of AArch64 intriniscs to be used in uniqueness checks.
|
|
|
|
StringMap<ClassKind> A64IntrinsicMap;
|
|
|
|
for (unsigned i = 0, e = RV.size(); i != e; ++i) {
|
|
|
|
Record *R = RV[i];
|
|
|
|
|
|
|
|
bool isA64 = R->getValueAsBit("isA64");
|
|
|
|
if (!isA64)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
ClassKind CK = ClassNone;
|
|
|
|
if (R->getSuperClasses().size() >= 2)
|
|
|
|
CK = ClassMap[R->getSuperClasses()[1]];
|
|
|
|
|
|
|
|
std::string Name = R->getValueAsString("Name");
|
2013-08-29 15:55:15 +08:00
|
|
|
std::string Proto = R->getValueAsString("Prototype");
|
|
|
|
std::string Rename = Name + "@" + Proto;
|
|
|
|
if (A64IntrinsicMap.count(Rename))
|
2013-08-01 17:23:19 +08:00
|
|
|
continue;
|
2013-08-29 15:55:15 +08:00
|
|
|
A64IntrinsicMap[Rename] = CK;
|
2013-08-01 17:23:19 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Generate BuiltinsARM.def for ARM
|
|
|
|
genBuiltinsDef(OS, A64IntrinsicMap, false);
|
|
|
|
|
|
|
|
// Generate BuiltinsAArch64.def for AArch64
|
|
|
|
genBuiltinsDef(OS, A64IntrinsicMap, true);
|
|
|
|
|
|
|
|
// Generate ARM overloaded type checking code for SemaChecking.cpp
|
|
|
|
genOverloadTypeCheckCode(OS, A64IntrinsicMap, false);
|
|
|
|
|
|
|
|
// Generate AArch64 overloaded type checking code for SemaChecking.cpp
|
|
|
|
genOverloadTypeCheckCode(OS, A64IntrinsicMap, true);
|
|
|
|
|
|
|
|
// Generate ARM range checking code for shift/lane immediates.
|
|
|
|
genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, false);
|
|
|
|
|
|
|
|
// Generate the AArch64 range checking code for shift/lane immediates.
|
|
|
|
genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, true);
|
|
|
|
}
|
|
|
|
|
2011-10-06 21:03:08 +08:00
|
|
|
/// GenTest - Write out a test for the intrinsic specified by the name and
|
|
|
|
/// type strings, including the embedded patterns for FileCheck to match.
|
|
|
|
static std::string GenTest(const std::string &name,
|
|
|
|
const std::string &proto,
|
|
|
|
StringRef outTypeStr, StringRef inTypeStr,
|
2013-04-17 06:48:52 +08:00
|
|
|
bool isShift, bool isHiddenLOp,
|
2013-08-01 17:23:19 +08:00
|
|
|
ClassKind ck, const std::string &InstName,
|
2013-11-16 07:33:31 +08:00
|
|
|
bool isA64,
|
|
|
|
std::string & testFuncProto) {
|
2011-10-06 21:03:08 +08:00
|
|
|
assert(!proto.empty() && "");
|
|
|
|
std::string s;
|
|
|
|
|
|
|
|
// Function name with type suffix
|
|
|
|
std::string mangledName = MangleName(name, outTypeStr, ClassS);
|
|
|
|
if (outTypeStr != inTypeStr) {
|
|
|
|
// If the input type is different (e.g., for vreinterpret), append a suffix
|
|
|
|
// for the input type. String off a "Q" (quad) prefix so that MangleName
|
|
|
|
// does not insert another "q" in the name.
|
|
|
|
unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
|
|
|
|
StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
|
|
|
|
mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
|
|
|
|
}
|
|
|
|
|
2013-08-01 17:23:19 +08:00
|
|
|
// todo: GenerateChecksForIntrinsic does not generate CHECK
|
|
|
|
// for aarch64 instructions yet
|
2013-04-17 07:00:26 +08:00
|
|
|
std::vector<std::string> FileCheckPatterns;
|
2013-08-01 17:23:19 +08:00
|
|
|
if (!isA64) {
|
|
|
|
GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName,
|
|
|
|
isHiddenLOp, FileCheckPatterns);
|
|
|
|
s+= "// CHECK_ARM: test_" + mangledName + "\n";
|
|
|
|
}
|
|
|
|
s += "// CHECK_AARCH64: test_" + mangledName + "\n";
|
2013-04-17 07:00:26 +08:00
|
|
|
|
2011-10-06 21:03:08 +08:00
|
|
|
// Emit the FileCheck patterns.
|
2013-04-17 07:00:26 +08:00
|
|
|
// If for any reason we do not want to emit a check, mangledInst
|
|
|
|
// will be the empty string.
|
|
|
|
if (FileCheckPatterns.size()) {
|
|
|
|
for (std::vector<std::string>::const_iterator i = FileCheckPatterns.begin(),
|
|
|
|
e = FileCheckPatterns.end();
|
|
|
|
i != e;
|
|
|
|
++i) {
|
2013-08-01 17:23:19 +08:00
|
|
|
s += "// CHECK_ARM: " + *i + "\n";
|
2013-04-17 07:00:26 +08:00
|
|
|
}
|
|
|
|
}
|
2011-10-06 21:03:08 +08:00
|
|
|
|
|
|
|
// Emit the start of the test function.
|
2013-08-01 17:23:19 +08:00
|
|
|
|
|
|
|
testFuncProto = TypeString(proto[0], outTypeStr) + " test_" + mangledName + "(";
|
2011-10-06 21:03:08 +08:00
|
|
|
char arg = 'a';
|
|
|
|
std::string comma;
|
|
|
|
for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
|
|
|
|
// Do not create arguments for values that must be immediate constants.
|
|
|
|
if (proto[i] == 'i')
|
|
|
|
continue;
|
2013-08-01 17:23:19 +08:00
|
|
|
testFuncProto += comma + TypeString(proto[i], inTypeStr) + " ";
|
|
|
|
testFuncProto.push_back(arg);
|
2011-10-06 21:03:08 +08:00
|
|
|
comma = ", ";
|
|
|
|
}
|
2013-08-01 17:23:19 +08:00
|
|
|
testFuncProto += ")";
|
|
|
|
|
|
|
|
s+= testFuncProto;
|
|
|
|
s+= " {\n ";
|
2011-10-06 21:03:08 +08:00
|
|
|
|
|
|
|
if (proto[0] != 'v')
|
|
|
|
s += "return ";
|
|
|
|
s += mangledName + "(";
|
|
|
|
arg = 'a';
|
|
|
|
for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
|
|
|
|
if (proto[i] == 'i') {
|
|
|
|
// For immediate operands, test the maximum value.
|
|
|
|
if (isShift)
|
|
|
|
s += "1"; // FIXME
|
|
|
|
else
|
|
|
|
// The immediate generally refers to a lane in the preceding argument.
|
|
|
|
s += utostr(RangeFromType(proto[i-1], inTypeStr));
|
|
|
|
} else {
|
|
|
|
s.push_back(arg);
|
|
|
|
}
|
|
|
|
if ((i + 1) < e)
|
|
|
|
s += ", ";
|
|
|
|
}
|
|
|
|
s += ");\n}\n\n";
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2013-08-01 17:23:19 +08:00
|
|
|
/// Write out all intrinsic tests for the specified target, checking
|
|
|
|
/// for intrinsic test uniqueness.
|
|
|
|
void NeonEmitter::genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
|
|
|
|
bool isA64GenTest) {
|
|
|
|
if (isA64GenTest)
|
|
|
|
OS << "#ifdef __aarch64__\n";
|
2011-10-06 21:03:08 +08:00
|
|
|
|
2013-08-01 17:23:19 +08:00
|
|
|
std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
|
2011-10-06 21:03:08 +08:00
|
|
|
for (unsigned i = 0, e = RV.size(); i != e; ++i) {
|
|
|
|
Record *R = RV[i];
|
|
|
|
std::string name = R->getValueAsString("Name");
|
|
|
|
std::string Proto = R->getValueAsString("Prototype");
|
|
|
|
std::string Types = R->getValueAsString("Types");
|
|
|
|
bool isShift = R->getValueAsBit("isShift");
|
2013-04-17 06:48:52 +08:00
|
|
|
std::string InstName = R->getValueAsString("InstName");
|
|
|
|
bool isHiddenLOp = R->getValueAsBit("isHiddenLInst");
|
2013-08-01 17:23:19 +08:00
|
|
|
bool isA64 = R->getValueAsBit("isA64");
|
|
|
|
|
|
|
|
// do not include AArch64 intrinsic test if not generating
|
|
|
|
// code for AArch64
|
|
|
|
if (!isA64GenTest && isA64)
|
|
|
|
continue;
|
2011-10-06 21:03:08 +08:00
|
|
|
|
|
|
|
SmallVector<StringRef, 16> TypeVec;
|
|
|
|
ParseTypes(R, Types, TypeVec);
|
|
|
|
|
2013-04-17 06:48:52 +08:00
|
|
|
ClassKind ck = ClassMap[R->getSuperClasses()[1]];
|
2011-10-06 21:03:08 +08:00
|
|
|
OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
|
2012-05-10 02:17:30 +08:00
|
|
|
if (kind == OpUnavailable)
|
|
|
|
continue;
|
2011-10-06 21:03:08 +08:00
|
|
|
for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
|
|
|
|
if (kind == OpReinterpret) {
|
|
|
|
bool outQuad = false;
|
|
|
|
bool dummy = false;
|
|
|
|
(void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
|
|
|
|
for (unsigned srcti = 0, srcte = TypeVec.size();
|
|
|
|
srcti != srcte; ++srcti) {
|
|
|
|
bool inQuad = false;
|
|
|
|
(void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
|
|
|
|
if (srcti == ti || inQuad != outQuad)
|
|
|
|
continue;
|
2013-08-01 17:23:19 +08:00
|
|
|
std::string testFuncProto;
|
|
|
|
std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti],
|
|
|
|
isShift, isHiddenLOp, ck, InstName, isA64,
|
|
|
|
testFuncProto);
|
|
|
|
if (EmittedMap.count(testFuncProto))
|
|
|
|
continue;
|
|
|
|
EmittedMap[testFuncProto] = kind;
|
|
|
|
OS << s << "\n";
|
2011-10-06 21:03:08 +08:00
|
|
|
}
|
|
|
|
} else {
|
2013-08-01 17:23:19 +08:00
|
|
|
std::string testFuncProto;
|
|
|
|
std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift,
|
|
|
|
isHiddenLOp, ck, InstName, isA64, testFuncProto);
|
|
|
|
if (EmittedMap.count(testFuncProto))
|
|
|
|
continue;
|
|
|
|
EmittedMap[testFuncProto] = kind;
|
|
|
|
OS << s << "\n";
|
2011-10-06 21:03:08 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2013-08-01 17:23:19 +08:00
|
|
|
|
|
|
|
if (isA64GenTest)
|
|
|
|
OS << "#endif\n";
|
|
|
|
}
|
|
|
|
/// runTests - Write out a complete set of tests for all of the Neon
|
|
|
|
/// intrinsics.
|
|
|
|
void NeonEmitter::runTests(raw_ostream &OS) {
|
|
|
|
OS << "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi "
|
|
|
|
"apcs-gnu\\\n"
|
|
|
|
"// RUN: -target-cpu swift -ffreestanding -Os -S -o - %s\\\n"
|
|
|
|
"// RUN: | FileCheck %s -check-prefix=CHECK_ARM\n"
|
|
|
|
"\n"
|
|
|
|
"// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \\\n"
|
|
|
|
"// RUN -target-feature +neon -ffreestanding -S -o - %s \\\n"
|
|
|
|
"// RUN: | FileCheck %s -check-prefix=CHECK_AARCH64\n"
|
|
|
|
"\n"
|
|
|
|
"// REQUIRES: long_tests\n"
|
|
|
|
"\n"
|
|
|
|
"#include <arm_neon.h>\n"
|
|
|
|
"\n";
|
|
|
|
|
|
|
|
// ARM tests must be emitted before AArch64 tests to ensure
|
|
|
|
// tests for intrinsics that are common to ARM and AArch64
|
|
|
|
// appear only once in the output stream.
|
|
|
|
// The check for uniqueness is done in genTargetTest.
|
|
|
|
StringMap<OpKind> EmittedMap;
|
|
|
|
|
|
|
|
genTargetTest(OS, EmittedMap, false);
|
|
|
|
|
|
|
|
genTargetTest(OS, EmittedMap, true);
|
2011-10-06 21:03:08 +08:00
|
|
|
}
|
|
|
|
|
2012-06-13 13:12:41 +08:00
|
|
|
namespace clang {
|
|
|
|
void EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
|
|
|
|
NeonEmitter(Records).run(OS);
|
|
|
|
}
|
|
|
|
void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
|
|
|
|
NeonEmitter(Records).runHeader(OS);
|
|
|
|
}
|
|
|
|
void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
|
|
|
|
NeonEmitter(Records).runTests(OS);
|
|
|
|
}
|
|
|
|
} // End namespace clang
|