forked from OSchip/llvm-project
Clang and AArch64 backend patches to support shll/shl and vmovl instructions and ACLE functions
llvm-svn: 188452
This commit is contained in:
parent
cd8b02dce3
commit
4efa1402fe
|
@ -152,6 +152,7 @@ class NoTestOpInst<string n, string p, string t, Op o> : Inst<n, p, t, o> {}
|
|||
// size modifiers:
|
||||
// U: unsigned
|
||||
// Q: 128b
|
||||
// H: 128b without mangling 'q'
|
||||
// P: polynomial
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -551,6 +552,18 @@ def FMINNMP : SInst<"vpminnm", "ddd", "fQfQd">;
|
|||
// With additional Qc Qs Qi QUc QUs QUi Qf Qd types.
|
||||
def ADDP : IInst<"vpadd", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQfQd">;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Shifts by constant
|
||||
let isShift = 1 in {
|
||||
// Left shift long high
|
||||
def SHLL_HIGH_N : SInst<"vshll_high_n", "ndi", "HcHsHiHUcHUsHUi">;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Converting vectors
|
||||
def VMOVL_HIGH : SInst<"vmovl_high", "nd", "HcHsHiHUcHUsHUi">;
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Scalar Arithmetic
|
||||
|
||||
|
|
|
@ -1620,6 +1620,37 @@ static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
|
|||
llvm_unreachable("Invalid NeonTypeFlags element type!");
|
||||
}
|
||||
|
||||
static Value *EmitExtendedSHL(CodeGenFunction &CGF,
|
||||
SmallVectorImpl<Value*> &Ops,
|
||||
llvm::VectorType *VTy, bool usgn, bool isHigh) {
|
||||
IRBuilder<> Builder = CGF.Builder;
|
||||
if (isHigh){
|
||||
unsigned NumElts = VTy->getNumElements();
|
||||
unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
|
||||
llvm::Type *EltTy =
|
||||
llvm::IntegerType::get(VTy->getContext(), EltBits / 2);
|
||||
// The source operand type has twice as many elements of half the size.
|
||||
llvm::Type *SrcTy = llvm::VectorType::get(EltTy, NumElts * 2);
|
||||
SmallVector<Constant*, 8> Indices;
|
||||
for (unsigned i = 0; i != NumElts; i++)
|
||||
Indices.push_back(Builder.getInt32(i + NumElts));
|
||||
Value *SV = llvm::ConstantVector::get(Indices);
|
||||
Value *Undef = llvm::UndefValue::get(SrcTy);
|
||||
Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
|
||||
Ops[0] = Builder.CreateShuffleVector(Ops[0], Undef, SV);
|
||||
} else {
|
||||
llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
|
||||
Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
|
||||
}
|
||||
|
||||
if (usgn)
|
||||
Ops[0] = Builder.CreateZExt(Ops[0], VTy);
|
||||
else
|
||||
Ops[0] = Builder.CreateSExt(Ops[0], VTy);
|
||||
Ops[1] = CGF.EmitNeonShiftVector(Ops[1], VTy, false);
|
||||
return Builder.CreateShl(Ops[0], Ops[1], "vshl_n");
|
||||
}
|
||||
|
||||
Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
|
||||
unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements();
|
||||
Value* SV = llvm::ConstantVector::getSplat(nElts, C);
|
||||
|
@ -1862,6 +1893,18 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|||
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrdmulh_v, E);
|
||||
case AArch64::BI__builtin_neon_vqrdmulhq_v:
|
||||
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrdmulhq_v, E);
|
||||
case AArch64::BI__builtin_neon_vshl_n_v:
|
||||
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshl_n_v, E);
|
||||
case AArch64::BI__builtin_neon_vshlq_n_v:
|
||||
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshlq_n_v, E);
|
||||
case AArch64::BI__builtin_neon_vmovl_v:
|
||||
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmovl_v, E);
|
||||
case AArch64::BI__builtin_neon_vshll_n_v:
|
||||
return EmitExtendedSHL(*this, Ops, VTy, usgn, false);
|
||||
case AArch64::BI__builtin_neon_vmovl_high_v:
|
||||
Ops.push_back(ConstantInt::get(Int32Ty, 0));
|
||||
case AArch64::BI__builtin_neon_vshll_high_n_v:
|
||||
return EmitExtendedSHL(*this, Ops, VTy, usgn, true);
|
||||
|
||||
// AArch64-only builtins
|
||||
case AArch64::BI__builtin_neon_vfms_v:
|
||||
|
|
|
@ -3021,3 +3021,231 @@ float64x2_t test_vmulxq_f64(float64x2_t a, float64x2_t b) {
|
|||
// CHECK: fmulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
|
||||
}
|
||||
|
||||
int8x8_t test_vshl_n_s8(int8x8_t a) {
|
||||
// CHECK: test_vshl_n_s8
|
||||
return vshl_n_s8(a, 3);
|
||||
// CHECK: shl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
|
||||
}
|
||||
|
||||
int16x4_t test_vshl_n_s16(int16x4_t a) {
|
||||
// CHECK: test_vshl_n_s16
|
||||
return vshl_n_s16(a, 3);
|
||||
// CHECK: shl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
|
||||
}
|
||||
|
||||
int32x2_t test_vshl_n_s32(int32x2_t a) {
|
||||
// CHECK: test_vshl_n_s32
|
||||
return vshl_n_s32(a, 3);
|
||||
// CHECK: shl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
|
||||
}
|
||||
|
||||
int8x16_t test_vshlq_n_s8(int8x16_t a) {
|
||||
// CHECK: test_vshlq_n_s8
|
||||
return vshlq_n_s8(a, 3);
|
||||
// CHECK: shl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
|
||||
}
|
||||
|
||||
int16x8_t test_vshlq_n_s16(int16x8_t a) {
|
||||
// CHECK: test_vshlq_n_s16
|
||||
return vshlq_n_s16(a, 3);
|
||||
// CHECK: shl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
|
||||
}
|
||||
|
||||
int32x4_t test_vshlq_n_s32(int32x4_t a) {
|
||||
// CHECK: test_vshlq_n_s32
|
||||
return vshlq_n_s32(a, 3);
|
||||
// CHECK: shl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
|
||||
}
|
||||
|
||||
int64x2_t test_vshlq_n_s64(int64x2_t a) {
|
||||
// CHECK: test_vshlq_n_s64
|
||||
return vshlq_n_s64(a, 3);
|
||||
// CHECK: shl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
|
||||
}
|
||||
|
||||
int8x8_t test_vshl_n_u8(int8x8_t a) {
|
||||
// CHECK: test_vshl_n_u8
|
||||
return vshl_n_u8(a, 3);
|
||||
// CHECK: shl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
|
||||
}
|
||||
|
||||
int16x4_t test_vshl_n_u16(int16x4_t a) {
|
||||
// CHECK: test_vshl_n_u16
|
||||
return vshl_n_u16(a, 3);
|
||||
// CHECK: shl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
|
||||
}
|
||||
|
||||
int32x2_t test_vshl_n_u32(int32x2_t a) {
|
||||
// CHECK: test_vshl_n_u32
|
||||
return vshl_n_u32(a, 3);
|
||||
// CHECK: shl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
|
||||
}
|
||||
|
||||
int8x16_t test_vshlq_n_u8(int8x16_t a) {
|
||||
// CHECK: test_vshlq_n_u8
|
||||
return vshlq_n_u8(a, 3);
|
||||
// CHECK: shl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
|
||||
}
|
||||
|
||||
int16x8_t test_vshlq_n_u16(int16x8_t a) {
|
||||
// CHECK: test_vshlq_n_u16
|
||||
return vshlq_n_u16(a, 3);
|
||||
// CHECK: shl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
|
||||
}
|
||||
|
||||
int32x4_t test_vshlq_n_u32(int32x4_t a) {
|
||||
// CHECK: test_vshlq_n_u32
|
||||
return vshlq_n_u32(a, 3);
|
||||
// CHECK: shl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
|
||||
}
|
||||
|
||||
int64x2_t test_vshlq_n_u64(int64x2_t a) {
|
||||
// CHECK: test_vshlq_n_u64
|
||||
return vshlq_n_u64(a, 3);
|
||||
// CHECK: shl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
|
||||
}
|
||||
|
||||
int16x8_t test_vshll_n_s8(int8x8_t a) {
|
||||
// CHECK: test_vshll_n_s8
|
||||
return vshll_n_s8(a, 3);
|
||||
// CHECK: sshll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #3
|
||||
}
|
||||
|
||||
int32x4_t test_vshll_n_s16(int16x4_t a) {
|
||||
// CHECK: test_vshll_n_s16
|
||||
return vshll_n_s16(a, 9);
|
||||
// CHECK: sshll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #9
|
||||
}
|
||||
|
||||
int64x2_t test_vshll_n_s32(int32x2_t a) {
|
||||
// CHECK: test_vshll_n_s32
|
||||
return vshll_n_s32(a, 19);
|
||||
// CHECK: sshll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #19
|
||||
}
|
||||
|
||||
uint16x8_t test_vshll_n_u8(uint8x8_t a) {
|
||||
// CHECK: test_vshll_n_u8
|
||||
return vshll_n_u8(a, 3);
|
||||
// CHECK: ushll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #3
|
||||
}
|
||||
|
||||
uint32x4_t test_vshll_n_u16(uint16x4_t a) {
|
||||
// CHECK: test_vshll_n_u16
|
||||
return vshll_n_u16(a, 9);
|
||||
// CHECK: ushll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #9
|
||||
}
|
||||
|
||||
uint64x2_t test_vshll_n_u32(uint32x2_t a) {
|
||||
// CHECK: test_vshll_n_u32
|
||||
return vshll_n_u32(a, 19);
|
||||
// CHECK: ushll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #19
|
||||
}
|
||||
|
||||
int16x8_t test_vshll_high_n_s8(int8x16_t a) {
|
||||
// CHECK: test_vshll_high_n_s8
|
||||
return vshll_high_n_s8(a, 3);
|
||||
// CHECK: sshll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #3
|
||||
}
|
||||
|
||||
int32x4_t test_vshll_high_n_s16(int16x8_t a) {
|
||||
// CHECK: test_vshll_high_n_s16
|
||||
return vshll_high_n_s16(a, 9);
|
||||
// CHECK: sshll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #9
|
||||
}
|
||||
|
||||
int64x2_t test_vshll_high_n_s32(int32x4_t a) {
|
||||
// CHECK: test_vshll_high_n_s32
|
||||
return vshll_high_n_s32(a, 19);
|
||||
// CHECK: sshll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #19
|
||||
}
|
||||
|
||||
uint16x8_t test_vshll_high_n_u8(uint8x16_t a) {
|
||||
// CHECK: test_vshll_high_n_u8
|
||||
return vshll_high_n_u8(a, 3);
|
||||
// CHECK: ushll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #3
|
||||
}
|
||||
|
||||
uint32x4_t test_vshll_high_n_u16(uint16x8_t a) {
|
||||
// CHECK: test_vshll_high_n_u16
|
||||
return vshll_high_n_u16(a, 9);
|
||||
// CHECK: ushll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #9
|
||||
}
|
||||
|
||||
uint64x2_t test_vshll_high_n_u32(uint32x4_t a) {
|
||||
// CHECK: test_vshll_high_n_u32
|
||||
return vshll_high_n_u32(a, 19);
|
||||
// CHECK: ushll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #19
|
||||
}
|
||||
|
||||
int16x8_t test_vmovl_s8(int8x8_t a) {
|
||||
// CHECK: test_vmovl_s8
|
||||
return vmovl_s8(a);
|
||||
// CHECK: sshll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #0
|
||||
}
|
||||
|
||||
int32x4_t test_vmovl_s16(int16x4_t a) {
|
||||
// CHECK: test_vmovl_s16
|
||||
return vmovl_s16(a);
|
||||
// CHECK: sshll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #0
|
||||
}
|
||||
|
||||
int64x2_t test_vmovl_s32(int32x2_t a) {
|
||||
// CHECK: test_vmovl_s32
|
||||
return vmovl_s32(a);
|
||||
// CHECK: sshll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #0
|
||||
}
|
||||
|
||||
uint16x8_t test_vmovl_u8(uint8x8_t a) {
|
||||
// CHECK: test_vmovl_u8
|
||||
return vmovl_u8(a);
|
||||
// CHECK: ushll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #0
|
||||
}
|
||||
|
||||
uint32x4_t test_vmovl_u16(uint16x4_t a) {
|
||||
// CHECK: test_vmovl_u16
|
||||
return vmovl_u16(a);
|
||||
// CHECK: ushll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #0
|
||||
}
|
||||
|
||||
uint64x2_t test_vmovl_u32(uint32x2_t a) {
|
||||
// CHECK: test_vmovl_u32
|
||||
return vmovl_u32(a);
|
||||
// CHECK: ushll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #0
|
||||
}
|
||||
|
||||
int16x8_t test_vmovl_high_s8(int8x16_t a) {
|
||||
// CHECK: test_vmovl_high_s8
|
||||
return vmovl_high_s8(a);
|
||||
// CHECK: sshll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #0
|
||||
}
|
||||
|
||||
int32x4_t test_vmovl_high_s16(int16x8_t a) {
|
||||
// CHECK: test_vmovl_high_s16
|
||||
return vmovl_high_s16(a);
|
||||
// CHECK: sshll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #0
|
||||
}
|
||||
|
||||
int64x2_t test_vmovl_high_s32(int32x4_t a) {
|
||||
// CHECK: test_vmovl_high_s32
|
||||
return vmovl_high_s32(a);
|
||||
// CHECK: sshll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #0
|
||||
}
|
||||
|
||||
uint16x8_t test_vmovl_high_u8(uint8x16_t a) {
|
||||
// CHECK: test_vmovl_high_u8
|
||||
return vmovl_high_u8(a);
|
||||
// CHECK: ushll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #0
|
||||
}
|
||||
|
||||
uint32x4_t test_vmovl_high_u16(uint16x8_t a) {
|
||||
// CHECK: test_vmovl_high_u16
|
||||
return vmovl_high_u16(a);
|
||||
// CHECK: ushll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #0
|
||||
}
|
||||
|
||||
uint64x2_t test_vmovl_high_u32(uint32x4_t a) {
|
||||
// CHECK: test_vmovl_high_u32
|
||||
return vmovl_high_u32(a);
|
||||
// CHECK: ushll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #0
|
||||
}
|
||||
|
||||
|
|
|
@ -263,7 +263,8 @@ static void ParseTypes(Record *r, std::string &s,
|
|||
int len = 0;
|
||||
|
||||
for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) {
|
||||
if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U')
|
||||
if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U'
|
||||
|| data[len] == 'H')
|
||||
continue;
|
||||
|
||||
switch (data[len]) {
|
||||
|
@ -325,7 +326,7 @@ static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) {
|
|||
unsigned off = 0;
|
||||
|
||||
// remember quad.
|
||||
if (ty[off] == 'Q') {
|
||||
if (ty[off] == 'Q' || ty[off] == 'H') {
|
||||
quad = true;
|
||||
++off;
|
||||
}
|
||||
|
@ -689,8 +690,8 @@ static void InstructionTypeCode(const StringRef &typeStr,
|
|||
}
|
||||
|
||||
/// MangleName - Append a type or width suffix to a base neon function name,
|
||||
/// and insert a 'q' in the appropriate location if the operation works on
|
||||
/// 128b rather than 64b. E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
|
||||
/// and insert a 'q' in the appropriate location if type string starts with 'Q'.
|
||||
/// E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
|
||||
static std::string MangleName(const std::string &name, StringRef typestr,
|
||||
ClassKind ck) {
|
||||
if (name == "vcvt_f32_f16")
|
||||
|
@ -712,9 +713,9 @@ static std::string MangleName(const std::string &name, StringRef typestr,
|
|||
|
||||
// Insert a 'q' before the first '_' character so that it ends up before
|
||||
// _lane or _n on vector-scalar operations.
|
||||
if (quad) {
|
||||
size_t pos = s.find('_');
|
||||
s = s.insert(pos, "q");
|
||||
if (typestr.startswith("Q")) {
|
||||
size_t pos = s.find('_');
|
||||
s = s.insert(pos, "q");
|
||||
}
|
||||
|
||||
return s;
|
||||
|
|
Loading…
Reference in New Issue