forked from OSchip/llvm-project
[NEON] Support VST1xN intrinsics in AArch32 mode (Clang part)
We currently support them only in AArch64. The NEON Reference, however, says they are 'ARMv7, ARMv8' intrinsics. Differential Revision: https://reviews.llvm.org/D47446 llvm-svn: 334362
This commit is contained in:
parent
847daa11f8
commit
73c76c35a5
|
@ -350,6 +350,12 @@ def VLD1_DUP : WInst<"vld1_dup", "dc",
|
|||
"QUcQUsQUiQUlQcQsQiQlQhQfQPcQPsUcUsUiUlcsilhfPcPs">;
|
||||
def VST1 : WInst<"vst1", "vpd",
|
||||
"QUcQUsQUiQUlQcQsQiQlQhQfQPcQPsUcUsUiUlcsilhfPcPs">;
|
||||
def VST1_X2 : WInst<"vst1_x2", "vp2",
|
||||
"cfhilsUcUiUlUsQcQfQhQiQlQsQUcQUiQUlQUsPcPsQPcQPs">;
|
||||
def VST1_X3 : WInst<"vst1_x3", "vp3",
|
||||
"cfhilsUcUiUlUsQcQfQhQiQlQsQUcQUiQUlQUsPcPsQPcQPs">;
|
||||
def VST1_X4 : WInst<"vst1_x4", "vp4",
|
||||
"cfhilsUcUiUlUsQcQfQhQiQlQsQUcQUiQUlQUsPcPsQPcQPs">;
|
||||
def VST1_LANE : WInst<"vst1_lane", "vpdi",
|
||||
"QUcQUsQUiQUlQcQsQiQlQhQfQPcQPsUcUsUiUlcsilhfPcPs">;
|
||||
|
||||
|
@ -581,12 +587,9 @@ def LD1_X3 : WInst<"vld1_x3", "3c",
|
|||
def LD1_X4 : WInst<"vld1_x4", "4c",
|
||||
"dQdPlQPl">;
|
||||
|
||||
def ST1_X2 : WInst<"vst1_x2", "vp2",
|
||||
"QUcQUsQUiQcQsQiQhQfQPcQPsUcUsUiUlcsilhfPcPsQUlQldQdPlQPl">;
|
||||
def ST1_X3 : WInst<"vst1_x3", "vp3",
|
||||
"QUcQUsQUiQcQsQiQhQfQPcQPsUcUsUiUlcsilhfPcPsQUlQldQdPlQPl">;
|
||||
def ST1_X4 : WInst<"vst1_x4", "vp4",
|
||||
"QUcQUsQUiQcQsQiQhQfQPcQPsUcUsUiUlcsilhfPcPsQUlQldQdPlQPl">;
|
||||
def ST1_X2 : WInst<"vst1_x2", "vp2", "dQdPlQPl">;
|
||||
def ST1_X3 : WInst<"vst1_x3", "vp3", "dQdPlQPl">;
|
||||
def ST1_X4 : WInst<"vst1_x4", "vp4", "dQdPlQPl">;
|
||||
|
||||
def LD1_LANE : WInst<"vld1_lane", "dcdi", "dQdPlQPl">;
|
||||
def LD2_LANE : WInst<"vld2_lane", "2c2i", "lUlQcQUcQPcQlQUldQdPlQPl">;
|
||||
|
|
|
@ -4181,7 +4181,13 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
|
|||
NEONMAP0(vshrn_n_v),
|
||||
NEONMAP0(vshrq_n_v),
|
||||
NEONMAP1(vst1_v, arm_neon_vst1, 0),
|
||||
NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
|
||||
NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
|
||||
NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
|
||||
NEONMAP1(vst1q_v, arm_neon_vst1, 0),
|
||||
NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
|
||||
NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
|
||||
NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
|
||||
NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
|
||||
NEONMAP1(vst2_v, arm_neon_vst2, 0),
|
||||
NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
|
||||
|
@ -4341,6 +4347,12 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
|
|||
NEONMAP0(vshr_n_v),
|
||||
NEONMAP0(vshrn_n_v),
|
||||
NEONMAP0(vshrq_n_v),
|
||||
NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
|
||||
NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
|
||||
NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
|
||||
NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
|
||||
NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
|
||||
NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
|
||||
NEONMAP0(vsubhn_v),
|
||||
NEONMAP0(vtst_v),
|
||||
NEONMAP0(vtstq_v),
|
||||
|
@ -5116,6 +5128,23 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
|
|||
Ops.push_back(getAlignmentValue32(PtrOp0));
|
||||
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
|
||||
}
|
||||
case NEON::BI__builtin_neon_vst1_x2_v:
|
||||
case NEON::BI__builtin_neon_vst1q_x2_v:
|
||||
case NEON::BI__builtin_neon_vst1_x3_v:
|
||||
case NEON::BI__builtin_neon_vst1q_x3_v:
|
||||
case NEON::BI__builtin_neon_vst1_x4_v:
|
||||
case NEON::BI__builtin_neon_vst1q_x4_v: {
|
||||
llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
|
||||
// TODO: Currently in AArch32 mode the pointer operand comes first, whereas
|
||||
// in AArch64 it comes last. We may want to stick to one or another.
|
||||
if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be) {
|
||||
llvm::Type *Tys[2] = { VTy, PTy };
|
||||
std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
|
||||
return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
|
||||
}
|
||||
llvm::Type *Tys[2] = { PTy, VTy };
|
||||
return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
|
||||
}
|
||||
case NEON::BI__builtin_neon_vsubhn_v: {
|
||||
llvm::VectorType *SrcTy =
|
||||
llvm::VectorType::getExtendedElementVectorType(VTy);
|
||||
|
@ -8075,34 +8104,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|||
llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
|
||||
Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
|
||||
return Builder.CreateAdd(Ops[0], tmp);
|
||||
}
|
||||
// FIXME: Sharing loads & stores with 32-bit is complicated by the absence
|
||||
// of an Align parameter here.
|
||||
case NEON::BI__builtin_neon_vst1_x2_v:
|
||||
case NEON::BI__builtin_neon_vst1q_x2_v:
|
||||
case NEON::BI__builtin_neon_vst1_x3_v:
|
||||
case NEON::BI__builtin_neon_vst1q_x3_v:
|
||||
case NEON::BI__builtin_neon_vst1_x4_v:
|
||||
case NEON::BI__builtin_neon_vst1q_x4_v: {
|
||||
llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
|
||||
llvm::Type *Tys[2] = { VTy, PTy };
|
||||
unsigned Int;
|
||||
switch (BuiltinID) {
|
||||
case NEON::BI__builtin_neon_vst1_x2_v:
|
||||
case NEON::BI__builtin_neon_vst1q_x2_v:
|
||||
Int = Intrinsic::aarch64_neon_st1x2;
|
||||
break;
|
||||
case NEON::BI__builtin_neon_vst1_x3_v:
|
||||
case NEON::BI__builtin_neon_vst1q_x3_v:
|
||||
Int = Intrinsic::aarch64_neon_st1x3;
|
||||
break;
|
||||
case NEON::BI__builtin_neon_vst1_x4_v:
|
||||
case NEON::BI__builtin_neon_vst1q_x4_v:
|
||||
Int = Intrinsic::aarch64_neon_st1x4;
|
||||
break;
|
||||
}
|
||||
std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
|
||||
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
|
||||
}
|
||||
case NEON::BI__builtin_neon_vld1_v:
|
||||
case NEON::BI__builtin_neon_vld1q_v: {
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue