Implement the newly added AArch64 ACLE functions for ld1/st1 with 2/3/4 vectors.

The functions are like: vst1_s8_x2 ...

llvm-svn: 194991
This commit is contained in:
Hao Liu 2013-11-18 06:33:43 +00:00
parent 5a4e4e107d
commit 5e4ce1ae9d
5 changed files with 1248 additions and 1 deletions

View File

@ -543,6 +543,20 @@ def ST3 : WInst<"vst3", "vp3",
def ST4 : WInst<"vst4", "vp4",
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">;
def LD1_X2 : WInst<"vld1_x2", "2c",
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
def LD3_x3 : WInst<"vld1_x3", "3c",
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
def LD4_x4 : WInst<"vld1_x4", "4c",
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
def ST1_X2 : WInst<"vst1_x2", "vp2",
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
def ST1_X3 : WInst<"vst1_x3", "vp3",
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
def ST1_X4 : WInst<"vst1_x4", "vp4",
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
////////////////////////////////////////////////////////////////////////////////
// Addition
// With additional Qd type.

View File

@ -2750,7 +2750,42 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
SmallVector<Value *, 4> Ops;
llvm::Value *Align = 0; // Alignment for load/store
for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
if (i == 0) {
switch (BuiltinID) {
case AArch64::BI__builtin_neon_vst1_x2_v:
case AArch64::BI__builtin_neon_vst1q_x2_v:
case AArch64::BI__builtin_neon_vst1_x3_v:
case AArch64::BI__builtin_neon_vst1q_x3_v:
case AArch64::BI__builtin_neon_vst1_x4_v:
case AArch64::BI__builtin_neon_vst1q_x4_v:
// Get the alignment for the argument in addition to the value;
// we'll use it later.
std::pair<llvm::Value *, unsigned> Src =
EmitPointerWithAlignment(E->getArg(0));
Ops.push_back(Src.first);
Align = Builder.getInt32(Src.second);
continue;
}
}
if (i == 1) {
switch (BuiltinID) {
case AArch64::BI__builtin_neon_vld1_x2_v:
case AArch64::BI__builtin_neon_vld1q_x2_v:
case AArch64::BI__builtin_neon_vld1_x3_v:
case AArch64::BI__builtin_neon_vld1q_x3_v:
case AArch64::BI__builtin_neon_vld1_x4_v:
case AArch64::BI__builtin_neon_vld1q_x4_v:
// Get the alignment for the argument in addition to the value;
// we'll use it later.
std::pair<llvm::Value *, unsigned> Src =
EmitPointerWithAlignment(E->getArg(1));
Ops.push_back(Src.first);
Align = Builder.getInt32(Src.second);
continue;
}
}
Ops.push_back(EmitScalarExpr(E->getArg(i)));
}
@ -3084,6 +3119,57 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4_v, E);
case AArch64::BI__builtin_neon_vst4q_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4q_v, E);
case AArch64::BI__builtin_neon_vld1_x2_v:
case AArch64::BI__builtin_neon_vld1q_x2_v:
case AArch64::BI__builtin_neon_vld1_x3_v:
case AArch64::BI__builtin_neon_vld1q_x3_v:
case AArch64::BI__builtin_neon_vld1_x4_v:
case AArch64::BI__builtin_neon_vld1q_x4_v: {
unsigned Int;
switch (BuiltinID) {
case AArch64::BI__builtin_neon_vld1_x2_v:
case AArch64::BI__builtin_neon_vld1q_x2_v:
Int = Intrinsic::aarch64_neon_vld1x2;
break;
case AArch64::BI__builtin_neon_vld1_x3_v:
case AArch64::BI__builtin_neon_vld1q_x3_v:
Int = Intrinsic::aarch64_neon_vld1x3;
break;
case AArch64::BI__builtin_neon_vld1_x4_v:
case AArch64::BI__builtin_neon_vld1q_x4_v:
Int = Intrinsic::aarch64_neon_vld1x4;
break;
}
Function *F = CGM.getIntrinsic(Int, Ty);
Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld1xN");
Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
return Builder.CreateStore(Ops[1], Ops[0]);
}
case AArch64::BI__builtin_neon_vst1_x2_v:
case AArch64::BI__builtin_neon_vst1q_x2_v:
case AArch64::BI__builtin_neon_vst1_x3_v:
case AArch64::BI__builtin_neon_vst1q_x3_v:
case AArch64::BI__builtin_neon_vst1_x4_v:
case AArch64::BI__builtin_neon_vst1q_x4_v: {
Ops.push_back(Align);
unsigned Int;
switch (BuiltinID) {
case AArch64::BI__builtin_neon_vst1_x2_v:
case AArch64::BI__builtin_neon_vst1q_x2_v:
Int = Intrinsic::aarch64_neon_vst1x2;
break;
case AArch64::BI__builtin_neon_vst1_x3_v:
case AArch64::BI__builtin_neon_vst1q_x3_v:
Int = Intrinsic::aarch64_neon_vst1x3;
break;
case AArch64::BI__builtin_neon_vst1_x4_v:
case AArch64::BI__builtin_neon_vst1q_x4_v:
Int = Intrinsic::aarch64_neon_vst1x4;
break;
}
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "");
}
// Crypto
case AArch64::BI__builtin_neon_vaeseq_v:

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,13 @@
// REQUIRES: aarch64-registered-target
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s
// Test whether arm_neon.h can be used in .cpp file.
#include "arm_neon.h"
poly64x1_t test_vld1_p64(poly64_t const * ptr) {
// CHECK: test_vld1_p64
return vld1_p64(ptr);
// CHECK: ld1 {{{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
}

View File

@ -881,6 +881,16 @@ static char Insert_BHSD_Suffix(StringRef typestr){
return 0;
}
static bool endsWith_xN(std::string const &name) {
if (name.length() > 3) {
if (name.compare(name.length() - 3, 3, "_x2") == 0 ||
name.compare(name.length() - 3, 3, "_x3") == 0 ||
name.compare(name.length() - 3, 3, "_x4") == 0)
return true;
}
return false;
}
/// MangleName - Append a type or width suffix to a base neon function name,
/// and insert a 'q' in the appropriate location if type string starts with 'Q'.
/// E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
@ -898,6 +908,10 @@ static std::string MangleName(const std::string &name, StringRef typestr,
std::string s = name;
if (typeCode.size() > 0) {
// If the name is end with _xN (N = 2,3,4), insert the typeCode before _xN.
if (endsWith_xN(s))
s.insert(s.length() - 3, "_" + typeCode);
else
s += "_" + typeCode;
}