Implement AArch64 neon instructions class SIMD lsone and SIMD lone-post.

llvm-svn: 195079
This commit is contained in:
Hao Liu 2013-11-19 02:17:31 +00:00
parent 16edc4675c
commit 171cedf61e
3 changed files with 2202 additions and 1 deletions

View File

@ -526,7 +526,7 @@ let isA64 = 1 in {
////////////////////////////////////////////////////////////////////////////////
// Load/Store
// With additional QUl, Ql, Qd, Pl, QPl type.
// With additional QUl, Ql, d, Qd, Pl, QPl type.
def LD1 : WInst<"vld1", "dc",
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">;
def LD2 : WInst<"vld2", "2c",
@ -558,6 +558,33 @@ def ST1_X3 : WInst<"vst1_x3", "vp3",
def ST1_X4 : WInst<"vst1_x4", "vp4",
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
// With additional QUl, Ql, d, Qd, Pl, QPl type.
def LD1_LANE : WInst<"vld1_lane", "dcdi",
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
def LD2_LANE : WInst<"vld2_lane", "2c2i",
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
def LD3_LANE : WInst<"vld3_lane", "3c3i",
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
def LD4_LANE : WInst<"vld4_lane", "4c4i",
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
def ST1_LANE : WInst<"vst1_lane", "vpdi",
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
def ST2_LANE : WInst<"vst2_lane", "vp2i",
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
def ST3_LANE : WInst<"vst3_lane", "vp3i",
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
def ST4_LANE : WInst<"vst4_lane", "vp4i",
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
def LD1_DUP : WInst<"vld1_dup", "dc",
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
def LD2_DUP : WInst<"vld2_dup", "2c",
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
def LD3_DUP : WInst<"vld3_dup", "3c",
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
def LD4_DUP : WInst<"vld4_dup", "4c",
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
////////////////////////////////////////////////////////////////////////////////
// Addition
// With additional Qd type.

View File

@ -2760,6 +2760,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case AArch64::BI__builtin_neon_vst1q_x3_v:
case AArch64::BI__builtin_neon_vst1_x4_v:
case AArch64::BI__builtin_neon_vst1q_x4_v:
// Handle ld1/st1 lane in this function a little different from ARM.
case AArch64::BI__builtin_neon_vld1_lane_v:
case AArch64::BI__builtin_neon_vld1q_lane_v:
case AArch64::BI__builtin_neon_vst1_lane_v:
case AArch64::BI__builtin_neon_vst1q_lane_v:
// Get the alignment for the argument in addition to the value;
// we'll use it later.
std::pair<llvm::Value *, unsigned> Src =
@ -2777,6 +2782,15 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case AArch64::BI__builtin_neon_vld1q_x3_v:
case AArch64::BI__builtin_neon_vld1_x4_v:
case AArch64::BI__builtin_neon_vld1q_x4_v:
// Handle ld1/st1 dup lane in this function a little different from ARM.
case AArch64::BI__builtin_neon_vld2_dup_v:
case AArch64::BI__builtin_neon_vld2q_dup_v:
case AArch64::BI__builtin_neon_vld3_dup_v:
case AArch64::BI__builtin_neon_vld3q_dup_v:
case AArch64::BI__builtin_neon_vld4_dup_v:
case AArch64::BI__builtin_neon_vld4q_dup_v:
case AArch64::BI__builtin_neon_vld2_lane_v:
case AArch64::BI__builtin_neon_vld2q_lane_v:
// Get the alignment for the argument in addition to the value;
// we'll use it later.
std::pair<llvm::Value *, unsigned> Src =
@ -3170,6 +3184,119 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "");
}
case AArch64::BI__builtin_neon_vld1_lane_v:
case AArch64::BI__builtin_neon_vld1q_lane_v: {
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
Ty = llvm::PointerType::getUnqual(VTy->getElementType());
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
LoadInst *Ld = Builder.CreateLoad(Ops[0]);
Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
}
case AArch64::BI__builtin_neon_vld2_lane_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld2q_lane_v, E);
case AArch64::BI__builtin_neon_vld2q_lane_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld2q_lane_v, E);
case AArch64::BI__builtin_neon_vld3_lane_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld3_lane_v, E);
case AArch64::BI__builtin_neon_vld3q_lane_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld3q_lane_v, E);
case AArch64::BI__builtin_neon_vld4_lane_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld4_lane_v, E);
case AArch64::BI__builtin_neon_vld4q_lane_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld4q_lane_v, E);
case AArch64::BI__builtin_neon_vst1_lane_v:
case AArch64::BI__builtin_neon_vst1q_lane_v: {
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
StoreInst *St =
Builder.CreateStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty));
St->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
return St;
}
case AArch64::BI__builtin_neon_vst2_lane_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst2_lane_v, E);
case AArch64::BI__builtin_neon_vst2q_lane_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst2q_lane_v, E);
case AArch64::BI__builtin_neon_vst3_lane_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst3_lane_v, E);
case AArch64::BI__builtin_neon_vst3q_lane_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst3q_lane_v, E);
case AArch64::BI__builtin_neon_vst4_lane_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4_lane_v, E);
case AArch64::BI__builtin_neon_vst4q_lane_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4q_lane_v, E);
case AArch64::BI__builtin_neon_vld1_dup_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld1_dup_v, E);
case AArch64::BI__builtin_neon_vld1q_dup_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld1q_dup_v, E);
case AArch64::BI__builtin_neon_vld2_dup_v:
case AArch64::BI__builtin_neon_vld2q_dup_v:
case AArch64::BI__builtin_neon_vld3_dup_v:
case AArch64::BI__builtin_neon_vld3q_dup_v:
case AArch64::BI__builtin_neon_vld4_dup_v:
case AArch64::BI__builtin_neon_vld4q_dup_v: {
// Handle 64-bit x 1 elements as a special-case. There is no "dup" needed.
if (VTy->getElementType()->getPrimitiveSizeInBits() == 64 &&
VTy->getNumElements() == 1) {
switch (BuiltinID) {
case AArch64::BI__builtin_neon_vld2_dup_v:
Int = Intrinsic::arm_neon_vld2;
break;
case AArch64::BI__builtin_neon_vld3_dup_v:
Int = Intrinsic::arm_neon_vld3;
break;
case AArch64::BI__builtin_neon_vld4_dup_v:
Int = Intrinsic::arm_neon_vld4;
break;
default:
llvm_unreachable("unknown vld_dup intrinsic?");
}
Function *F = CGM.getIntrinsic(Int, Ty);
Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld_dup");
Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
return Builder.CreateStore(Ops[1], Ops[0]);
}
switch (BuiltinID) {
case AArch64::BI__builtin_neon_vld2_dup_v:
case AArch64::BI__builtin_neon_vld2q_dup_v:
Int = Intrinsic::arm_neon_vld2lane;
break;
case AArch64::BI__builtin_neon_vld3_dup_v:
case AArch64::BI__builtin_neon_vld3q_dup_v:
Int = Intrinsic::arm_neon_vld3lane;
break;
case AArch64::BI__builtin_neon_vld4_dup_v:
case AArch64::BI__builtin_neon_vld4q_dup_v:
Int = Intrinsic::arm_neon_vld4lane;
break;
}
Function *F = CGM.getIntrinsic(Int, Ty);
llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
SmallVector<Value *, 6> Args;
Args.push_back(Ops[1]);
Args.append(STy->getNumElements(), UndefValue::get(Ty));
llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
Args.push_back(CI);
Args.push_back(Align);
Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
// splat lane 0 to all elts in each vector of the result.
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
Value *Val = Builder.CreateExtractValue(Ops[1], i);
Value *Elt = Builder.CreateBitCast(Val, Ty);
Elt = EmitNeonSplat(Elt, CI);
Elt = Builder.CreateBitCast(Elt, Val->getType());
Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
}
Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
return Builder.CreateStore(Ops[1], Ops[0]);
}
// Crypto
case AArch64::BI__builtin_neon_vaeseq_v:

File diff suppressed because it is too large Load Diff