forked from OSchip/llvm-project
Implement the newly added AArch64 ACLE functions for ld1/st1 with 2/3/4 vectors.
The functions are like: vst1_s8_x2 ... llvm-svn: 194991
This commit is contained in:
parent
5a4e4e107d
commit
5e4ce1ae9d
|
@ -543,6 +543,20 @@ def ST3 : WInst<"vst3", "vp3",
|
|||
def ST4 : WInst<"vst4", "vp4",
|
||||
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">;
|
||||
|
||||
def LD1_X2 : WInst<"vld1_x2", "2c",
|
||||
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
|
||||
def LD3_x3 : WInst<"vld1_x3", "3c",
|
||||
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
|
||||
def LD4_x4 : WInst<"vld1_x4", "4c",
|
||||
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
|
||||
|
||||
def ST1_X2 : WInst<"vst1_x2", "vp2",
|
||||
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
|
||||
def ST1_X3 : WInst<"vst1_x3", "vp3",
|
||||
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
|
||||
def ST1_X4 : WInst<"vst1_x4", "vp4",
|
||||
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Addition
|
||||
// With additional Qd type.
|
||||
|
|
|
@ -2750,7 +2750,42 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|||
}
|
||||
|
||||
SmallVector<Value *, 4> Ops;
|
||||
llvm::Value *Align = 0; // Alignment for load/store
|
||||
for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
|
||||
if (i == 0) {
|
||||
switch (BuiltinID) {
|
||||
case AArch64::BI__builtin_neon_vst1_x2_v:
|
||||
case AArch64::BI__builtin_neon_vst1q_x2_v:
|
||||
case AArch64::BI__builtin_neon_vst1_x3_v:
|
||||
case AArch64::BI__builtin_neon_vst1q_x3_v:
|
||||
case AArch64::BI__builtin_neon_vst1_x4_v:
|
||||
case AArch64::BI__builtin_neon_vst1q_x4_v:
|
||||
// Get the alignment for the argument in addition to the value;
|
||||
// we'll use it later.
|
||||
std::pair<llvm::Value *, unsigned> Src =
|
||||
EmitPointerWithAlignment(E->getArg(0));
|
||||
Ops.push_back(Src.first);
|
||||
Align = Builder.getInt32(Src.second);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (i == 1) {
|
||||
switch (BuiltinID) {
|
||||
case AArch64::BI__builtin_neon_vld1_x2_v:
|
||||
case AArch64::BI__builtin_neon_vld1q_x2_v:
|
||||
case AArch64::BI__builtin_neon_vld1_x3_v:
|
||||
case AArch64::BI__builtin_neon_vld1q_x3_v:
|
||||
case AArch64::BI__builtin_neon_vld1_x4_v:
|
||||
case AArch64::BI__builtin_neon_vld1q_x4_v:
|
||||
// Get the alignment for the argument in addition to the value;
|
||||
// we'll use it later.
|
||||
std::pair<llvm::Value *, unsigned> Src =
|
||||
EmitPointerWithAlignment(E->getArg(1));
|
||||
Ops.push_back(Src.first);
|
||||
Align = Builder.getInt32(Src.second);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
Ops.push_back(EmitScalarExpr(E->getArg(i)));
|
||||
}
|
||||
|
||||
|
@ -3084,6 +3119,57 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|||
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4_v, E);
|
||||
case AArch64::BI__builtin_neon_vst4q_v:
|
||||
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4q_v, E);
|
||||
case AArch64::BI__builtin_neon_vld1_x2_v:
|
||||
case AArch64::BI__builtin_neon_vld1q_x2_v:
|
||||
case AArch64::BI__builtin_neon_vld1_x3_v:
|
||||
case AArch64::BI__builtin_neon_vld1q_x3_v:
|
||||
case AArch64::BI__builtin_neon_vld1_x4_v:
|
||||
case AArch64::BI__builtin_neon_vld1q_x4_v: {
|
||||
unsigned Int;
|
||||
switch (BuiltinID) {
|
||||
case AArch64::BI__builtin_neon_vld1_x2_v:
|
||||
case AArch64::BI__builtin_neon_vld1q_x2_v:
|
||||
Int = Intrinsic::aarch64_neon_vld1x2;
|
||||
break;
|
||||
case AArch64::BI__builtin_neon_vld1_x3_v:
|
||||
case AArch64::BI__builtin_neon_vld1q_x3_v:
|
||||
Int = Intrinsic::aarch64_neon_vld1x3;
|
||||
break;
|
||||
case AArch64::BI__builtin_neon_vld1_x4_v:
|
||||
case AArch64::BI__builtin_neon_vld1q_x4_v:
|
||||
Int = Intrinsic::aarch64_neon_vld1x4;
|
||||
break;
|
||||
}
|
||||
Function *F = CGM.getIntrinsic(Int, Ty);
|
||||
Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld1xN");
|
||||
Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
|
||||
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
|
||||
return Builder.CreateStore(Ops[1], Ops[0]);
|
||||
}
|
||||
case AArch64::BI__builtin_neon_vst1_x2_v:
|
||||
case AArch64::BI__builtin_neon_vst1q_x2_v:
|
||||
case AArch64::BI__builtin_neon_vst1_x3_v:
|
||||
case AArch64::BI__builtin_neon_vst1q_x3_v:
|
||||
case AArch64::BI__builtin_neon_vst1_x4_v:
|
||||
case AArch64::BI__builtin_neon_vst1q_x4_v: {
|
||||
Ops.push_back(Align);
|
||||
unsigned Int;
|
||||
switch (BuiltinID) {
|
||||
case AArch64::BI__builtin_neon_vst1_x2_v:
|
||||
case AArch64::BI__builtin_neon_vst1q_x2_v:
|
||||
Int = Intrinsic::aarch64_neon_vst1x2;
|
||||
break;
|
||||
case AArch64::BI__builtin_neon_vst1_x3_v:
|
||||
case AArch64::BI__builtin_neon_vst1q_x3_v:
|
||||
Int = Intrinsic::aarch64_neon_vst1x3;
|
||||
break;
|
||||
case AArch64::BI__builtin_neon_vst1_x4_v:
|
||||
case AArch64::BI__builtin_neon_vst1q_x4_v:
|
||||
Int = Intrinsic::aarch64_neon_vst1x4;
|
||||
break;
|
||||
}
|
||||
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "");
|
||||
}
|
||||
|
||||
// Crypto
|
||||
case AArch64::BI__builtin_neon_vaeseq_v:
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,13 @@
|
|||
// REQUIRES: aarch64-registered-target
|
||||
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
|
||||
// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s
|
||||
|
||||
// Test whether arm_neon.h can be used in .cpp file.
|
||||
|
||||
#include "arm_neon.h"
|
||||
|
||||
poly64x1_t test_vld1_p64(poly64_t const * ptr) {
|
||||
// CHECK: test_vld1_p64
|
||||
return vld1_p64(ptr);
|
||||
// CHECK: ld1 {{{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
|
||||
}
|
|
@ -881,6 +881,16 @@ static char Insert_BHSD_Suffix(StringRef typestr){
|
|||
return 0;
|
||||
}
|
||||
|
||||
static bool endsWith_xN(std::string const &name) {
|
||||
if (name.length() > 3) {
|
||||
if (name.compare(name.length() - 3, 3, "_x2") == 0 ||
|
||||
name.compare(name.length() - 3, 3, "_x3") == 0 ||
|
||||
name.compare(name.length() - 3, 3, "_x4") == 0)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// MangleName - Append a type or width suffix to a base neon function name,
|
||||
/// and insert a 'q' in the appropriate location if type string starts with 'Q'.
|
||||
/// E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
|
||||
|
@ -898,6 +908,10 @@ static std::string MangleName(const std::string &name, StringRef typestr,
|
|||
std::string s = name;
|
||||
|
||||
if (typeCode.size() > 0) {
|
||||
// If the name is end with _xN (N = 2,3,4), insert the typeCode before _xN.
|
||||
if (endsWith_xN(s))
|
||||
s.insert(s.length() - 3, "_" + typeCode);
|
||||
else
|
||||
s += "_" + typeCode;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue