forked from OSchip/llvm-project
Implemented Neon scalar by element intrinsics.
Intrinsics implemented: vqdmull_lane, vqdmulh_lane, vqrdmulh_lane, vqdmlal_lane, vqdmlsl_lane scalar Neon intrinsics. llvm-svn: 195326
This commit is contained in:
parent
95f3e54066
commit
2b02688fd9
|
@ -119,6 +119,12 @@ def OP_SCALAR_MULX_LN : Op;
|
|||
def OP_SCALAR_MULX_LNQ : Op;
|
||||
def OP_SCALAR_VMULX_LN : Op;
|
||||
def OP_SCALAR_VMULX_LNQ : Op;
|
||||
def OP_SCALAR_QDMULL_LN : Op;
|
||||
def OP_SCALAR_QDMULL_LNQ : Op;
|
||||
def OP_SCALAR_QDMULH_LN : Op;
|
||||
def OP_SCALAR_QDMULH_LNQ : Op;
|
||||
def OP_SCALAR_QRDMULH_LN : Op;
|
||||
def OP_SCALAR_QRDMULH_LNQ : Op;
|
||||
|
||||
class Inst <string n, string p, string t, Op o> {
|
||||
string Name = n;
|
||||
|
@ -1244,4 +1250,25 @@ def SCALAR_FMLA_LANEQ : IInst<"vfma_laneq", "sssji", "SfSd">;
|
|||
// Scalar Floating Point fused multiply-subtract (scalar, by element)
|
||||
def SCALAR_FMLS_LANE : IOpInst<"vfms_lane", "sssdi", "SfSd", OP_FMS_LN>;
|
||||
def SCALAR_FMLS_LANEQ : IOpInst<"vfms_laneq", "sssji", "SfSd", OP_FMS_LNQ>;
|
||||
|
||||
// Signed Saturating Doubling Multiply Long (scalar by element)
|
||||
def SCALAR_SQDMULL_LANE : SOpInst<"vqdmull_lane", "rsdi", "SsSi", OP_SCALAR_QDMULL_LN>;
|
||||
def SCALAR_SQDMULL_LANEQ : SOpInst<"vqdmull_laneq", "rsji", "SsSi", OP_SCALAR_QDMULL_LNQ>;
|
||||
|
||||
// Signed Saturating Doubling Multiply-Add Long (scalar by element)
|
||||
def SCALAR_SQDMLAL_LANE : SInst<"vqdmlal_lane", "rrsdi", "SsSi">;
|
||||
def SCALAR_SQDMLAL_LANEQ : SInst<"vqdmlal_laneq", "rrsji", "SsSi">;
|
||||
|
||||
// Signed Saturating Doubling Multiply-Subtract Long (scalar by element)
|
||||
def SCALAR_SQDMLS_LANE : SInst<"vqdmlsl_lane", "rrsdi", "SsSi">;
|
||||
def SCALAR_SQDMLS_LANEQ : SInst<"vqdmlsl_laneq", "rrsji", "SsSi">;
|
||||
|
||||
// Scalar Integer Saturating Doubling Multiply Half High (scalar by element)
|
||||
def SCALAR_SQDMULH_LANE : SOpInst<"vqdmulh_lane", "ssdi", "SsSi", OP_SCALAR_QDMULH_LN>;
|
||||
def SCALAR_SQDMULH_LANEQ : SOpInst<"vqdmulh_laneq", "ssji", "SsSi", OP_SCALAR_QDMULH_LNQ>;
|
||||
|
||||
// Scalar Integer Saturating Rounding Doubling Multiply Half High
|
||||
def SCALAR_SQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "ssdi", "SsSi", OP_SCALAR_QRDMULH_LN>;
|
||||
def SCALAR_SQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "ssji", "SsSi", OP_SCALAR_QRDMULH_LNQ>;
|
||||
|
||||
}
|
||||
|
|
|
@ -1772,13 +1772,52 @@ static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF,
|
|||
// argument that specifies the vector type, need to handle each case.
|
||||
switch (BuiltinID) {
|
||||
default: break;
|
||||
case AArch64::BI__builtin_neon_vqdmlalh_lane_s16 :
|
||||
case AArch64::BI__builtin_neon_vqdmlalh_laneq_s16 :
|
||||
case AArch64::BI__builtin_neon_vqdmlals_lane_s32 :
|
||||
case AArch64::BI__builtin_neon_vqdmlals_laneq_s32 :
|
||||
case AArch64::BI__builtin_neon_vqdmlslh_lane_s16 :
|
||||
case AArch64::BI__builtin_neon_vqdmlslh_laneq_s16 :
|
||||
case AArch64::BI__builtin_neon_vqdmlsls_lane_s32 :
|
||||
case AArch64::BI__builtin_neon_vqdmlsls_laneq_s32 : {
|
||||
Int = Intrinsic::arm_neon_vqadds;
|
||||
if (BuiltinID == AArch64::BI__builtin_neon_vqdmlslh_lane_s16 ||
|
||||
BuiltinID == AArch64::BI__builtin_neon_vqdmlslh_laneq_s16 ||
|
||||
BuiltinID == AArch64::BI__builtin_neon_vqdmlsls_lane_s32 ||
|
||||
BuiltinID == AArch64::BI__builtin_neon_vqdmlsls_laneq_s32) {
|
||||
Int = Intrinsic::arm_neon_vqsubs;
|
||||
}
|
||||
// create vqdmull call with b * c[i]
|
||||
llvm::Type *Ty = CGF.ConvertType(E->getArg(1)->getType());
|
||||
llvm::VectorType *OpVTy = llvm::VectorType::get(Ty, 1);
|
||||
Ty = CGF.ConvertType(E->getArg(0)->getType());
|
||||
llvm::VectorType *ResVTy = llvm::VectorType::get(Ty, 1);
|
||||
Value *F = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, ResVTy);
|
||||
Value *V = UndefValue::get(OpVTy);
|
||||
llvm::Constant *CI = ConstantInt::get(CGF.Int32Ty, 0);
|
||||
SmallVector<Value *, 2> MulOps;
|
||||
MulOps.push_back(Ops[1]);
|
||||
MulOps.push_back(Ops[2]);
|
||||
MulOps[0] = CGF.Builder.CreateInsertElement(V, MulOps[0], CI);
|
||||
MulOps[1] = CGF.Builder.CreateExtractElement(MulOps[1], Ops[3], "extract");
|
||||
MulOps[1] = CGF.Builder.CreateInsertElement(V, MulOps[1], CI);
|
||||
Value *MulRes = CGF.Builder.CreateCall2(F, MulOps[0], MulOps[1]);
|
||||
// create vqadds call with a +/- vqdmull result
|
||||
F = CGF.CGM.getIntrinsic(Int, ResVTy);
|
||||
SmallVector<Value *, 2> AddOps;
|
||||
AddOps.push_back(Ops[0]);
|
||||
AddOps.push_back(MulRes);
|
||||
V = UndefValue::get(ResVTy);
|
||||
AddOps[0] = CGF.Builder.CreateInsertElement(V, AddOps[0], CI);
|
||||
Value *AddRes = CGF.Builder.CreateCall2(F, AddOps[0], AddOps[1]);
|
||||
return CGF.Builder.CreateBitCast(AddRes, Ty);
|
||||
}
|
||||
case AArch64::BI__builtin_neon_vfmas_lane_f32:
|
||||
case AArch64::BI__builtin_neon_vfmas_laneq_f32:
|
||||
case AArch64::BI__builtin_neon_vfmad_lane_f64:
|
||||
case AArch64::BI__builtin_neon_vfmad_laneq_f64: {
|
||||
llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
|
||||
Value *F = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
|
||||
// extract lane acc += x * v[i]
|
||||
Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
|
||||
return CGF.Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
|
||||
}
|
||||
|
@ -1857,26 +1896,26 @@ static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF,
|
|||
case AArch64::BI__builtin_neon_vqaddh_s16:
|
||||
case AArch64::BI__builtin_neon_vqadds_s32:
|
||||
case AArch64::BI__builtin_neon_vqaddd_s64:
|
||||
Int = Intrinsic::aarch64_neon_vqadds;
|
||||
Int = Intrinsic::arm_neon_vqadds;
|
||||
s = "vqadds"; OverloadInt = true; break;
|
||||
case AArch64::BI__builtin_neon_vqaddb_u8:
|
||||
case AArch64::BI__builtin_neon_vqaddh_u16:
|
||||
case AArch64::BI__builtin_neon_vqadds_u32:
|
||||
case AArch64::BI__builtin_neon_vqaddd_u64:
|
||||
Int = Intrinsic::aarch64_neon_vqaddu;
|
||||
Int = Intrinsic::arm_neon_vqaddu;
|
||||
s = "vqaddu"; OverloadInt = true; break;
|
||||
// Scalar Saturating Sub
|
||||
case AArch64::BI__builtin_neon_vqsubb_s8:
|
||||
case AArch64::BI__builtin_neon_vqsubh_s16:
|
||||
case AArch64::BI__builtin_neon_vqsubs_s32:
|
||||
case AArch64::BI__builtin_neon_vqsubd_s64:
|
||||
Int = Intrinsic::aarch64_neon_vqsubs;
|
||||
Int = Intrinsic::arm_neon_vqsubs;
|
||||
s = "vqsubs"; OverloadInt = true; break;
|
||||
case AArch64::BI__builtin_neon_vqsubb_u8:
|
||||
case AArch64::BI__builtin_neon_vqsubh_u16:
|
||||
case AArch64::BI__builtin_neon_vqsubs_u32:
|
||||
case AArch64::BI__builtin_neon_vqsubd_u64:
|
||||
Int = Intrinsic::aarch64_neon_vqsubu;
|
||||
Int = Intrinsic::arm_neon_vqsubu;
|
||||
s = "vqsubu"; OverloadInt = true; break;
|
||||
// Scalar Shift Left
|
||||
case AArch64::BI__builtin_neon_vshld_s64:
|
||||
|
@ -2270,7 +2309,7 @@ static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF,
|
|||
// Signed Saturating Doubling Multiply Long
|
||||
case AArch64::BI__builtin_neon_vqdmullh_s16:
|
||||
case AArch64::BI__builtin_neon_vqdmulls_s32:
|
||||
Int = Intrinsic::aarch64_neon_vqdmull;
|
||||
Int = Intrinsic::arm_neon_vqdmull;
|
||||
s = "vqdmull"; OverloadWideInt = true; break;
|
||||
// Scalar Signed Saturating Extract Unsigned Narrow
|
||||
case AArch64::BI__builtin_neon_vqmovunh_s16:
|
||||
|
|
|
@ -61,71 +61,195 @@ float64_t test_vmulxd_laneq_f64(float64_t a, float64x2_t b) {
|
|||
// CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
|
||||
}
|
||||
|
||||
// CHECK_AARCH64: test_vmulx_lane_f64
|
||||
// CHECK: test_vmulx_lane_f64
|
||||
float64x1_t test_vmulx_lane_f64(float64x1_t a, float64x1_t b) {
|
||||
return vmulx_lane_f64(a, b, 0);
|
||||
// CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
|
||||
}
|
||||
|
||||
|
||||
// CHECK_AARCH64: test_vmulx_laneq_f64_0
|
||||
// CHECK: test_vmulx_laneq_f64_0
|
||||
float64x1_t test_vmulx_laneq_f64_0(float64x1_t a, float64x2_t b) {
|
||||
return vmulx_laneq_f64(a, b, 0);
|
||||
// CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
|
||||
}
|
||||
|
||||
// CHECK_AARCH64: test_vmulx_laneq_f64_1
|
||||
// CHECK: test_vmulx_laneq_f64_1
|
||||
float64x1_t test_vmulx_laneq_f64_1(float64x1_t a, float64x2_t b) {
|
||||
return vmulx_laneq_f64(a, b, 1);
|
||||
// CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
|
||||
}
|
||||
|
||||
|
||||
// CHECK_AARCH64: test_vfmas_lane_f32
|
||||
// CHECK: test_vfmas_lane_f32
|
||||
float32_t test_vfmas_lane_f32(float32_t a, float32_t b, float32x2_t c) {
|
||||
return vfmas_lane_f32(a, b, c, 1);
|
||||
// CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
|
||||
}
|
||||
|
||||
// CHECK_AARCH64: test_vfmad_lane_f64
|
||||
// CHECK: test_vfmad_lane_f64
|
||||
float64_t test_vfmad_lane_f64(float64_t a, float64_t b, float64x1_t c) {
|
||||
return vfmad_lane_f64(a, b, c, 0);
|
||||
// CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
|
||||
}
|
||||
|
||||
// CHECK_AARCH64: test_vfmad_laneq_f64
|
||||
// CHECK: test_vfmad_laneq_f64
|
||||
float64_t test_vfmad_laneq_f64(float64_t a, float64_t b, float64x2_t c) {
|
||||
return vfmad_laneq_f64(a, b, c, 1);
|
||||
// CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
|
||||
}
|
||||
|
||||
// CHECK_AARCH64: test_vfmss_lane_f32
|
||||
// CHECK: test_vfmss_lane_f32
|
||||
float32_t test_vfmss_lane_f32(float32_t a, float32_t b, float32x2_t c) {
|
||||
return vfmss_lane_f32(a, b, c, 1);
|
||||
// CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
|
||||
}
|
||||
|
||||
// CHECK_AARCH64: test_vfma_lane_f64
|
||||
// CHECK: test_vfma_lane_f64
|
||||
float64x1_t test_vfma_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) {
|
||||
return vfma_lane_f64(a, b, v, 0);
|
||||
// CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
|
||||
}
|
||||
|
||||
// CHECK_AARCH64: test_vfms_lane_f64
|
||||
// CHECK: test_vfms_lane_f64
|
||||
float64x1_t test_vfms_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) {
|
||||
return vfms_lane_f64(a, b, v, 0);
|
||||
// CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
|
||||
}
|
||||
|
||||
// CHECK_AARCH64: test_vfma_laneq_f64
|
||||
// CHECK: test_vfma_laneq_f64
|
||||
float64x1_t test_vfma_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) {
|
||||
return vfma_laneq_f64(a, b, v, 0);
|
||||
// CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
|
||||
}
|
||||
|
||||
// CHECK_AARCH64: test_vfms_laneq_f64
|
||||
// CHECK: test_vfms_laneq_f64
|
||||
float64x1_t test_vfms_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) {
|
||||
return vfms_laneq_f64(a, b, v, 0);
|
||||
// CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
|
||||
}
|
||||
|
||||
// CHECK: test_vqdmullh_lane_s16
|
||||
int32_t test_vqdmullh_lane_s16(int16_t a, int16x4_t b) {
|
||||
return vqdmullh_lane_s16(a, b, 3);
|
||||
// CHECK: sqdmull {{s[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[3]
|
||||
}
|
||||
|
||||
// CHECK: test_vqdmulls_lane_s32
|
||||
int64_t test_vqdmulls_lane_s32(int32_t a, int32x2_t b) {
|
||||
return vqdmulls_lane_s32(a, b, 1);
|
||||
// CHECK: sqdmull {{d[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
|
||||
}
|
||||
|
||||
// CHECK: test_vqdmullh_laneq_s16
|
||||
int32_t test_vqdmullh_laneq_s16(int16_t a, int16x8_t b) {
|
||||
return vqdmullh_laneq_s16(a, b, 7);
|
||||
// CHECK: sqdmull {{s[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[7]
|
||||
}
|
||||
|
||||
// CHECK: test_vqdmulls_laneq_s32
|
||||
int64_t test_vqdmulls_laneq_s32(int32_t a, int32x4_t b) {
|
||||
return vqdmulls_laneq_s32(a, b, 3);
|
||||
// CHECK: sqdmull {{d[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
|
||||
}
|
||||
|
||||
// CHECK: test_vqdmulhh_lane_s16
|
||||
int16_t test_vqdmulhh_lane_s16(int16_t a, int16x4_t b) {
|
||||
return vqdmulhh_lane_s16(a, b, 3);
|
||||
// CHECK: sqdmulh {{h[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[3]
|
||||
}
|
||||
|
||||
// CHECK: test_vqdmulhs_lane_s32
|
||||
int32_t test_vqdmulhs_lane_s32(int32_t a, int32x2_t b) {
|
||||
return vqdmulhs_lane_s32(a, b, 1);
|
||||
// CHECK: sqdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
|
||||
}
|
||||
|
||||
|
||||
// CHECK: test_vqdmulhh_laneq_s16
|
||||
int16_t test_vqdmulhh_laneq_s16(int16_t a, int16x8_t b) {
|
||||
return vqdmulhh_laneq_s16(a, b, 7);
|
||||
// CHECK: sqdmulh {{h[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[7]
|
||||
}
|
||||
|
||||
|
||||
// CHECK: test_vqdmulhs_laneq_s32
|
||||
int32_t test_vqdmulhs_laneq_s32(int32_t a, int32x4_t b) {
|
||||
return vqdmulhs_laneq_s32(a, b, 3);
|
||||
// CHECK: sqdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
|
||||
}
|
||||
|
||||
// CHECK: test_vqrdmulhh_lane_s16
|
||||
int16_t test_vqrdmulhh_lane_s16(int16_t a, int16x4_t b) {
|
||||
return vqrdmulhh_lane_s16(a, b, 3);
|
||||
// CHECK: sqrdmulh {{h[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[3]
|
||||
}
|
||||
|
||||
// CHECK: test_vqrdmulhs_lane_s32
|
||||
int32_t test_vqrdmulhs_lane_s32(int32_t a, int32x2_t b) {
|
||||
return vqrdmulhs_lane_s32(a, b, 1);
|
||||
// CHECK: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
|
||||
}
|
||||
|
||||
|
||||
// CHECK: test_vqrdmulhh_laneq_s16
|
||||
int16_t test_vqrdmulhh_laneq_s16(int16_t a, int16x8_t b) {
|
||||
return vqrdmulhh_laneq_s16(a, b, 7);
|
||||
// CHECK: sqrdmulh {{h[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[7]
|
||||
}
|
||||
|
||||
|
||||
// CHECK: test_vqrdmulhs_laneq_s32
|
||||
int32_t test_vqrdmulhs_laneq_s32(int32_t a, int32x4_t b) {
|
||||
return vqrdmulhs_laneq_s32(a, b, 3);
|
||||
// CHECK: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
|
||||
}
|
||||
|
||||
// CHECK: test_vqdmlalh_lane_s16
|
||||
int32_t test_vqdmlalh_lane_s16(int32_t a, int16_t b, int16x4_t c) {
|
||||
return vqdmlalh_lane_s16(a, b, c, 3);
|
||||
// CHECK: sqdmlal {{s[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[3]
|
||||
}
|
||||
|
||||
// CHECK: test_vqdmlals_lane_s32
|
||||
int64_t test_vqdmlals_lane_s32(int64_t a, int32_t b, int32x2_t c) {
|
||||
return vqdmlals_lane_s32(a, b, c, 1);
|
||||
// CHECK: sqdmlal {{d[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
|
||||
}
|
||||
|
||||
// CHECK: test_vqdmlalh_laneq_s16
|
||||
int32_t test_vqdmlalh_laneq_s16(int32_t a, int16_t b, int16x8_t c) {
|
||||
return vqdmlalh_laneq_s16(a, b, c, 7);
|
||||
// CHECK: sqdmlal {{s[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[7]
|
||||
}
|
||||
|
||||
// CHECK: test_vqdmlals_laneq_s32
|
||||
int64_t test_vqdmlals_laneq_s32(int64_t a, int32_t b, int32x4_t c) {
|
||||
return vqdmlals_laneq_s32(a, b, c, 3);
|
||||
// CHECK: sqdmlal {{d[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
|
||||
}
|
||||
|
||||
// CHECK: test_vqdmlslh_lane_s16
|
||||
int32_t test_vqdmlslh_lane_s16(int32_t a, int16_t b, int16x4_t c) {
|
||||
return vqdmlslh_lane_s16(a, b, c, 3);
|
||||
// CHECK: sqdmlsl {{s[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[3]
|
||||
}
|
||||
|
||||
// CHECK: test_vqdmlsls_lane_s32
|
||||
int64_t test_vqdmlsls_lane_s32(int64_t a, int32_t b, int32x2_t c) {
|
||||
return vqdmlsls_lane_s32(a, b, c, 1);
|
||||
// CHECK: sqdmlsl {{d[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
|
||||
}
|
||||
|
||||
// CHECK: test_vqdmlslh_laneq_s16
|
||||
int32_t test_vqdmlslh_laneq_s16(int32_t a, int16_t b, int16x8_t c) {
|
||||
return vqdmlslh_laneq_s16(a, b, c, 7);
|
||||
// CHECK: sqdmlsl {{s[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[7]
|
||||
}
|
||||
|
||||
// CHECK: test_vqdmlsls_laneq_s32
|
||||
int64_t test_vqdmlsls_laneq_s32(int64_t a, int32_t b, int32x4_t c) {
|
||||
return vqdmlsls_laneq_s32(a, b, c, 3);
|
||||
// CHECK: sqdmlsl {{d[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
|
||||
}
|
||||
|
||||
|
|
|
@ -140,7 +140,13 @@ enum OpKind {
|
|||
OpScalarMulXLane,
|
||||
OpScalarMulXLaneQ,
|
||||
OpScalarVMulXLane,
|
||||
OpScalarVMulXLaneQ
|
||||
OpScalarVMulXLaneQ,
|
||||
OpScalarQDMullLane,
|
||||
OpScalarQDMullLaneQ,
|
||||
OpScalarQDMulHiLane,
|
||||
OpScalarQDMulHiLaneQ,
|
||||
OpScalarQRDMulHiLane,
|
||||
OpScalarQRDMulHiLaneQ
|
||||
};
|
||||
|
||||
enum ClassKind {
|
||||
|
@ -307,6 +313,13 @@ public:
|
|||
OpMap["OP_SCALAR_MULX_LNQ"]= OpScalarMulXLaneQ;
|
||||
OpMap["OP_SCALAR_VMULX_LN"]= OpScalarVMulXLane;
|
||||
OpMap["OP_SCALAR_VMULX_LNQ"]= OpScalarVMulXLaneQ;
|
||||
OpMap["OP_SCALAR_QDMULL_LN"] = OpScalarQDMullLane;
|
||||
OpMap["OP_SCALAR_QDMULL_LNQ"] = OpScalarQDMullLaneQ;
|
||||
OpMap["OP_SCALAR_QDMULH_LN"] = OpScalarQDMulHiLane;
|
||||
OpMap["OP_SCALAR_QDMULH_LNQ"] = OpScalarQDMulHiLaneQ;
|
||||
OpMap["OP_SCALAR_QRDMULH_LN"] = OpScalarQRDMulHiLane;
|
||||
OpMap["OP_SCALAR_QRDMULH_LNQ"] = OpScalarQRDMulHiLaneQ;
|
||||
|
||||
|
||||
Record *SI = R.getClass("SInst");
|
||||
Record *II = R.getClass("IInst");
|
||||
|
@ -2033,8 +2046,8 @@ static std::string GenOpString(const std::string &name, OpKind op,
|
|||
case OpScalarMulLane: {
|
||||
std::string typeCode = "";
|
||||
InstructionTypeCode(typestr, ClassS, quad, typeCode);
|
||||
s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode +
|
||||
"(__b, __c);\\\n __a * __d1;";
|
||||
s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode +
|
||||
"(__b, __c);\\\n __a * __d1;";
|
||||
break;
|
||||
}
|
||||
case OpScalarMulLaneQ: {
|
||||
|
@ -2100,7 +2113,48 @@ static std::string GenOpString(const std::string &name, OpKind op,
|
|||
" vset_lane_" + typeCode + "(__f1, __g1, 0);";
|
||||
break;
|
||||
}
|
||||
|
||||
case OpScalarQDMullLane: {
|
||||
std::string typeCode = "";
|
||||
InstructionTypeCode(typestr, ClassS, quad, typeCode);
|
||||
s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
|
||||
"vget_lane_" + typeCode + "(b, __c));";
|
||||
break;
|
||||
}
|
||||
case OpScalarQDMullLaneQ: {
|
||||
std::string typeCode = "";
|
||||
InstructionTypeCode(typestr, ClassS, quad, typeCode);
|
||||
s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
|
||||
"vgetq_lane_" + typeCode + "(b, __c));";
|
||||
break;
|
||||
}
|
||||
case OpScalarQDMulHiLane: {
|
||||
std::string typeCode = "";
|
||||
InstructionTypeCode(typestr, ClassS, quad, typeCode);
|
||||
s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
|
||||
"vget_lane_" + typeCode + "(__b, __c));";
|
||||
break;
|
||||
}
|
||||
case OpScalarQDMulHiLaneQ: {
|
||||
std::string typeCode = "";
|
||||
InstructionTypeCode(typestr, ClassS, quad, typeCode);
|
||||
s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
|
||||
"vgetq_lane_" + typeCode + "(__b, __c));";
|
||||
break;
|
||||
}
|
||||
case OpScalarQRDMulHiLane: {
|
||||
std::string typeCode = "";
|
||||
InstructionTypeCode(typestr, ClassS, quad, typeCode);
|
||||
s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
|
||||
"vget_lane_" + typeCode + "(__b, __c));";
|
||||
break;
|
||||
}
|
||||
case OpScalarQRDMulHiLaneQ: {
|
||||
std::string typeCode = "";
|
||||
InstructionTypeCode(typestr, ClassS, quad, typeCode);
|
||||
s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
|
||||
"vgetq_lane_" + typeCode + "(__b, __c));";
|
||||
break;
|
||||
}
|
||||
default:
|
||||
PrintFatalError("unknown OpKind!");
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue