Implement AArch64 vector load/store multiple N-element structure class SIMD(lselem).

Including following 14 instructions:
4 ld1 insts: load multiple 1-element structure to sequential 1/2/3/4 registers.
ld2/ld3/ld4: load multiple N-element structure to sequential N registers (N=2,3,4).
4 st1 insts: store multiple 1-element structure from sequential 1/2/3/4 registers.
st2/st3/st4: store multiple N-element structure from sequential N registers (N = 2,3,4).

llvm-svn: 192362
This commit is contained in:
Hao Liu 2013-10-10 17:01:49 +00:00
parent 99eac7ee44
commit 1eade6d927
3 changed files with 1406 additions and 0 deletions

View File

@ -497,6 +497,26 @@ def VFMA : SInst<"vfma", "dddd", "fQf">;
let isA64 = 1 in {
////////////////////////////////////////////////////////////////////////////////
// Load/Store
// With additional QUl, Ql, Qd type.
def LD1 : WInst<"vld1", "dc",
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPs">;
def LD2 : WInst<"vld2", "2c",
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPs">;
def LD3 : WInst<"vld3", "3c",
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPs">;
def LD4 : WInst<"vld4", "4c",
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPs">;
def ST1 : WInst<"vst1", "vpd",
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPs">;
def ST2 : WInst<"vst2", "vp2",
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPs">;
def ST3 : WInst<"vst3", "vp3",
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPs">;
def ST4 : WInst<"vst4", "vp4",
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPs">;
////////////////////////////////////////////////////////////////////////////////
// Addition
// With additional Qd type.

View File

@ -2345,6 +2345,40 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return EmitNeonCall(F, Ops, "vcvt_n");
}
// Load/Store
case AArch64::BI__builtin_neon_vld1_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld1_v, E);
case AArch64::BI__builtin_neon_vld1q_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld1q_v, E);
case AArch64::BI__builtin_neon_vld2_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld2_v, E);
case AArch64::BI__builtin_neon_vld2q_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld2q_v, E);
case AArch64::BI__builtin_neon_vld3_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld3_v, E);
case AArch64::BI__builtin_neon_vld3q_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld3q_v, E);
case AArch64::BI__builtin_neon_vld4_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld4_v, E);
case AArch64::BI__builtin_neon_vld4q_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld4q_v, E);
case AArch64::BI__builtin_neon_vst1_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst1_v, E);
case AArch64::BI__builtin_neon_vst1q_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst1q_v, E);
case AArch64::BI__builtin_neon_vst2_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst2_v, E);
case AArch64::BI__builtin_neon_vst2q_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst2q_v, E);
case AArch64::BI__builtin_neon_vst3_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst3_v, E);
case AArch64::BI__builtin_neon_vst3q_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst3q_v, E);
case AArch64::BI__builtin_neon_vst4_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4_v, E);
case AArch64::BI__builtin_neon_vst4q_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4q_v, E);
// AArch64-only builtins
case AArch64::BI__builtin_neon_vfma_lane_v:
case AArch64::BI__builtin_neon_vfmaq_laneq_v: {

File diff suppressed because it is too large Load Diff