forked from OSchip/llvm-project
[ARM] Enabling range checks on Neon intrinsics' lane arguments
Summary: Range checks were not properly performed in the lane arguments of Neon intrinsics implemented based on splat operations. Calls to those intrinsics where translated to `__builtin__shufflevector` calls directly by the pre-processor through the arm_neon.h macros, missing the chance for the proper range checks. This patch enables the range check by introducing an auxiliary splat instruction in arm_neon.td, delaying the translation to shufflevector calls to CGBuiltin.cpp in clang after the checks were performed. Reviewers: jmolloy, t.p.northover, rsmith, olista01, ostannard Reviewed By: ostannard Subscribers: ostannard, dnsampaio, danielkiss, kristof.beyls, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D74619
This commit is contained in:
parent
d42711625a
commit
f56550cf7f
|
@ -51,39 +51,39 @@ def OP_FMLA_N : Op<(call "vfma", $p0, $p1, (dup $p2))>;
|
|||
def OP_FMLS_N : Op<(call "vfma", $p0, (op "-", $p1), (dup $p2))>;
|
||||
def OP_MLAL_N : Op<(op "+", $p0, (call "vmull", $p1, (dup $p2)))>;
|
||||
def OP_MLSL_N : Op<(op "-", $p0, (call "vmull", $p1, (dup $p2)))>;
|
||||
def OP_MUL_LN : Op<(op "*", $p0, (splat $p1, $p2))>;
|
||||
def OP_MULX_LN : Op<(call "vmulx", $p0, (splat $p1, $p2))>;
|
||||
def OP_MUL_LN : Op<(op "*", $p0, (call_mangled "splat_lane", $p1, $p2))>;
|
||||
def OP_MULX_LN : Op<(call "vmulx", $p0, (call_mangled "splat_lane", $p1, $p2))>;
|
||||
def OP_MULL_N : Op<(call "vmull", $p0, (dup $p1))>;
|
||||
def OP_MULL_LN : Op<(call "vmull", $p0, (splat $p1, $p2))>;
|
||||
def OP_MULLHi_LN: Op<(call "vmull", (call "vget_high", $p0), (splat $p1, $p2))>;
|
||||
def OP_MLA_LN : Op<(op "+", $p0, (op "*", $p1, (splat $p2, $p3)))>;
|
||||
def OP_MLS_LN : Op<(op "-", $p0, (op "*", $p1, (splat $p2, $p3)))>;
|
||||
def OP_MLAL_LN : Op<(op "+", $p0, (call "vmull", $p1, (splat $p2, $p3)))>;
|
||||
def OP_MULL_LN : Op<(call "vmull", $p0, (call_mangled "splat_lane", $p1, $p2))>;
|
||||
def OP_MULLHi_LN: Op<(call "vmull", (call "vget_high", $p0), (call_mangled "splat_lane", $p1, $p2))>;
|
||||
def OP_MLA_LN : Op<(op "+", $p0, (op "*", $p1, (call_mangled "splat_lane", $p2, $p3)))>;
|
||||
def OP_MLS_LN : Op<(op "-", $p0, (op "*", $p1, (call_mangled "splat_lane", $p2, $p3)))>;
|
||||
def OP_MLAL_LN : Op<(op "+", $p0, (call "vmull", $p1, (call_mangled "splat_lane", $p2, $p3)))>;
|
||||
def OP_MLALHi_LN: Op<(op "+", $p0, (call "vmull", (call "vget_high", $p1),
|
||||
(splat $p2, $p3)))>;
|
||||
def OP_MLSL_LN : Op<(op "-", $p0, (call "vmull", $p1, (splat $p2, $p3)))>;
|
||||
(call_mangled "splat_lane", $p2, $p3)))>;
|
||||
def OP_MLSL_LN : Op<(op "-", $p0, (call "vmull", $p1, (call_mangled "splat_lane", $p2, $p3)))>;
|
||||
def OP_MLSLHi_LN : Op<(op "-", $p0, (call "vmull", (call "vget_high", $p1),
|
||||
(splat $p2, $p3)))>;
|
||||
(call_mangled "splat_lane", $p2, $p3)))>;
|
||||
def OP_QDMULL_N : Op<(call "vqdmull", $p0, (dup $p1))>;
|
||||
def OP_QDMULL_LN : Op<(call "vqdmull", $p0, (splat $p1, $p2))>;
|
||||
def OP_QDMULL_LN : Op<(call "vqdmull", $p0, (call_mangled "splat_lane", $p1, $p2))>;
|
||||
def OP_QDMULLHi_LN : Op<(call "vqdmull", (call "vget_high", $p0),
|
||||
(splat $p1, $p2))>;
|
||||
(call_mangled "splat_lane", $p1, $p2))>;
|
||||
def OP_QDMLAL_N : Op<(call "vqdmlal", $p0, $p1, (dup $p2))>;
|
||||
def OP_QDMLAL_LN : Op<(call "vqdmlal", $p0, $p1, (splat $p2, $p3))>;
|
||||
def OP_QDMLAL_LN : Op<(call "vqdmlal", $p0, $p1, (call_mangled "splat_lane", $p2, $p3))>;
|
||||
def OP_QDMLALHi_LN : Op<(call "vqdmlal", $p0, (call "vget_high", $p1),
|
||||
(splat $p2, $p3))>;
|
||||
(call_mangled "splat_lane", $p2, $p3))>;
|
||||
def OP_QDMLSL_N : Op<(call "vqdmlsl", $p0, $p1, (dup $p2))>;
|
||||
def OP_QDMLSL_LN : Op<(call "vqdmlsl", $p0, $p1, (splat $p2, $p3))>;
|
||||
def OP_QDMLSL_LN : Op<(call "vqdmlsl", $p0, $p1, (call_mangled "splat_lane", $p2, $p3))>;
|
||||
def OP_QDMLSLHi_LN : Op<(call "vqdmlsl", $p0, (call "vget_high", $p1),
|
||||
(splat $p2, $p3))>;
|
||||
(call_mangled "splat_lane", $p2, $p3))>;
|
||||
def OP_QDMULH_N : Op<(call "vqdmulh", $p0, (dup $p1))>;
|
||||
def OP_QDMULH_LN : Op<(call "vqdmulh", $p0, (splat $p1, $p2))>;
|
||||
def OP_QRDMULH_LN : Op<(call "vqrdmulh", $p0, (splat $p1, $p2))>;
|
||||
def OP_QDMULH_LN : Op<(call "vqdmulh", $p0, (call_mangled "splat_lane", $p1, $p2))>;
|
||||
def OP_QRDMULH_LN : Op<(call "vqrdmulh", $p0, (call_mangled "splat_lane", $p1, $p2))>;
|
||||
def OP_QRDMULH_N : Op<(call "vqrdmulh", $p0, (dup $p1))>;
|
||||
def OP_QRDMLAH : Op<(call "vqadd", $p0, (call "vqrdmulh", $p1, $p2))>;
|
||||
def OP_QRDMLSH : Op<(call "vqsub", $p0, (call "vqrdmulh", $p1, $p2))>;
|
||||
def OP_QRDMLAH_LN : Op<(call "vqadd", $p0, (call "vqrdmulh", $p1, (splat $p2, $p3)))>;
|
||||
def OP_QRDMLSH_LN : Op<(call "vqsub", $p0, (call "vqrdmulh", $p1, (splat $p2, $p3)))>;
|
||||
def OP_QRDMLAH_LN : Op<(call "vqadd", $p0, (call "vqrdmulh", $p1, (call_mangled "splat_lane", $p2, $p3)))>;
|
||||
def OP_QRDMLSH_LN : Op<(call "vqsub", $p0, (call "vqrdmulh", $p1, (call_mangled "splat_lane", $p2, $p3)))>;
|
||||
def OP_FMS_LN : Op<(call "vfma_lane", $p0, (op "-", $p1), $p2, $p3)>;
|
||||
def OP_FMS_LNQ : Op<(call "vfma_laneq", $p0, (op "-", $p1), $p2, $p3)>;
|
||||
def OP_TRN1 : Op<(shuffle $p0, $p1, (interleave (decimate mask0, 2),
|
||||
|
@ -115,7 +115,7 @@ def OP_HI : Op<(shuffle $p0, $p0, (highhalf mask0))>;
|
|||
def OP_LO : Op<(shuffle $p0, $p0, (lowhalf mask0))>;
|
||||
def OP_CONC : Op<(shuffle $p0, $p1, (add mask0, mask1))>;
|
||||
def OP_DUP : Op<(dup $p0)>;
|
||||
def OP_DUP_LN : Op<(splat $p0, $p1)>;
|
||||
def OP_DUP_LN : Op<(call_mangled "splat_lane", $p0, $p1)>;
|
||||
def OP_SEL : Op<(cast "R", (op "|",
|
||||
(op "&", $p0, (cast $p0, $p1)),
|
||||
(op "&", (op "~", $p0), (cast $p0, $p2))))>;
|
||||
|
@ -207,10 +207,10 @@ def OP_SCALAR_HALF_SET_LNQ : Op<(bitcast "float16x8_t",
|
|||
|
||||
def OP_DOT_LN
|
||||
: Op<(call "vdot", $p0, $p1,
|
||||
(bitcast $p1, (splat(bitcast "uint32x2_t", $p2), $p3)))>;
|
||||
(bitcast $p1, (call_mangled "splat_lane", (bitcast "32", $p2), $p3)))>;
|
||||
def OP_DOT_LNQ
|
||||
: Op<(call "vdot", $p0, $p1,
|
||||
(bitcast $p1, (splat(bitcast "uint32x4_t", $p2), $p3)))>;
|
||||
(bitcast $p1, (call_mangled "splat_lane", (bitcast "32", $p2), $p3)))>;
|
||||
|
||||
def OP_FMLAL_LN : Op<(call "vfmlal_low", $p0, $p1,
|
||||
(dup_typed $p1, (call "vget_lane", $p2, $p3)))>;
|
||||
|
@ -222,7 +222,19 @@ def OP_FMLSL_LN_Hi : Op<(call "vfmlsl_high", $p0, $p1,
|
|||
(dup_typed $p1, (call "vget_lane", $p2, $p3)))>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Instructions
|
||||
// Auxiliary Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Splat operation - performs a range-checked splat over a vector
|
||||
def SPLAT : WInst<"splat_lane", ".(!q)I",
|
||||
"UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl">;
|
||||
def SPLATQ : WInst<"splat_laneq", ".(!Q)I",
|
||||
"UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl"> {
|
||||
let isLaneQ = 1;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Intrinsics
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
@ -88,6 +88,7 @@ def call_mangled;
|
|||
// - "D" - Double the number of lanes in the type.
|
||||
// - "8" - Convert type to an equivalent vector of 8-bit signed
|
||||
// integers.
|
||||
// - "32" - Convert type to an equivalent vector of 32-bit integers.
|
||||
// example: (cast "R", "U", $p0) -> "(uint32x4_t)__p0" (assuming the return
|
||||
// value is of type "int32x4_t".
|
||||
// (cast $p0, "D", "8", $p1) -> "(int8x16_t)__p1" (assuming __p0
|
||||
|
@ -109,12 +110,6 @@ def dup;
|
|||
// example: (dup_typed $p1, $p2) -> "(float16x4_t) {__p2, __p2, __p2, __p2}"
|
||||
// (assuming __p1 is float16x4_t, and __p2 is a compatible scalar).
|
||||
def dup_typed;
|
||||
// splat - Take a vector and a lane index, and return a vector of the same type
|
||||
// containing repeated instances of the source vector at the lane index.
|
||||
// example: (splat $p0, $p1) ->
|
||||
// "__builtin_shufflevector(__p0, __p0, __p1, __p1, __p1, __p1)"
|
||||
// (assuming __p0 has four elements).
|
||||
def splat;
|
||||
// save_temp - Create a temporary (local) variable. The variable takes a name
|
||||
// based on the zero'th parameter and can be referenced using
|
||||
// using that name in subsequent DAGs in the same
|
||||
|
|
|
@ -4495,10 +4495,15 @@ static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
|
|||
}
|
||||
}
|
||||
|
||||
Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C,
|
||||
const ElementCount &Count) {
|
||||
Value *SV = llvm::ConstantVector::getSplat(Count, C);
|
||||
return Builder.CreateShuffleVector(V, V, SV, "lane");
|
||||
}
|
||||
|
||||
Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
|
||||
ElementCount EC = V->getType()->getVectorElementCount();
|
||||
Value *SV = llvm::ConstantVector::getSplat(EC, C);
|
||||
return Builder.CreateShuffleVector(V, V, SV, "lane");
|
||||
return EmitNeonSplat(V, C, EC);
|
||||
}
|
||||
|
||||
Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
|
||||
|
@ -4605,6 +4610,10 @@ struct ARMVectorIntrinsicInfo {
|
|||
TypeModifier }
|
||||
|
||||
static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
|
||||
NEONMAP0(splat_lane_v),
|
||||
NEONMAP0(splat_laneq_v),
|
||||
NEONMAP0(splatq_lane_v),
|
||||
NEONMAP0(splatq_laneq_v),
|
||||
NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
|
||||
NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
|
||||
NEONMAP1(vabs_v, arm_neon_vabs, 0),
|
||||
|
@ -4886,6 +4895,10 @@ static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
|
|||
};
|
||||
|
||||
static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
|
||||
NEONMAP0(splat_lane_v),
|
||||
NEONMAP0(splat_laneq_v),
|
||||
NEONMAP0(splatq_lane_v),
|
||||
NEONMAP0(splatq_laneq_v),
|
||||
NEONMAP1(vabs_v, aarch64_neon_abs, 0),
|
||||
NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
|
||||
NEONMAP0(vaddhn_v),
|
||||
|
@ -5460,6 +5473,19 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
|
|||
|
||||
switch (BuiltinID) {
|
||||
default: break;
|
||||
case NEON::BI__builtin_neon_splat_lane_v:
|
||||
case NEON::BI__builtin_neon_splat_laneq_v:
|
||||
case NEON::BI__builtin_neon_splatq_lane_v:
|
||||
case NEON::BI__builtin_neon_splatq_laneq_v: {
|
||||
auto NumElements = VTy->getElementCount();
|
||||
if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
|
||||
NumElements = NumElements * 2;
|
||||
if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
|
||||
NumElements = NumElements / 2;
|
||||
|
||||
Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
|
||||
return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
|
||||
}
|
||||
case NEON::BI__builtin_neon_vpadd_v:
|
||||
case NEON::BI__builtin_neon_vpaddq_v:
|
||||
// We don't allow fp/int overloading of intrinsics.
|
||||
|
|
|
@ -3894,6 +3894,8 @@ public:
|
|||
SmallVectorImpl<llvm::Value*> &O,
|
||||
const char *name,
|
||||
unsigned shift = 0, bool rightshift = false);
|
||||
llvm::Value *EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx,
|
||||
const llvm::ElementCount &Count);
|
||||
llvm::Value *EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx);
|
||||
llvm::Value *EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty,
|
||||
bool negateForRightShift);
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -69,144 +69,177 @@ float64x2_t test_vmlsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
|
|||
}
|
||||
|
||||
// CHECK-LABEL: define <2 x float> @test_vmla_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 {
|
||||
// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> zeroinitializer
|
||||
// CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
|
||||
// CHECK: [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]]
|
||||
// CHECK: ret <2 x float> [[ADD]]
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
|
||||
// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> zeroinitializer
|
||||
// CHECK: [[MUL:%.*]] = fmul <2 x float> [[B:%.*]], [[LANE]]
|
||||
// CHECK: [[ADD:%.*]] = fadd <2 x float> [[A:%.*]], [[MUL]]
|
||||
// CHECK: ret <2 x float> [[ADD]]
|
||||
float32x2_t test_vmla_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) {
|
||||
return vmla_lane_f32(a, b, v, 0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define <4 x float> @test_vmlaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #1 {
|
||||
// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> zeroinitializer
|
||||
// CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
|
||||
// CHECK: [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]]
|
||||
// CHECK: ret <4 x float> [[ADD]]
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
|
||||
// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> zeroinitializer
|
||||
// CHECK: [[MUL:%.*]] = fmul <4 x float> [[B:%.*]], [[LANE]]
|
||||
// CHECK: [[ADD:%.*]] = fadd <4 x float> [[A:%.*]], [[MUL]]
|
||||
// CHECK: ret <4 x float> [[ADD]]
|
||||
float32x4_t test_vmlaq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) {
|
||||
return vmlaq_lane_f32(a, b, v, 0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define <2 x float> @test_vmla_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #1 {
|
||||
// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> zeroinitializer
|
||||
// CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
|
||||
// CHECK: [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]]
|
||||
// CHECK: ret <2 x float> [[ADD]]
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
|
||||
// CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <2 x i32> zeroinitializer
|
||||
// CHECK: [[MUL:%.*]] = fmul <2 x float> [[B:%.*]], [[LANE]]
|
||||
// CHECK: [[ADD:%.*]] = fadd <2 x float> [[A:%.*]], [[MUL]]
|
||||
// CHECK: ret <2 x float> [[ADD]]
|
||||
float32x2_t test_vmla_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) {
|
||||
return vmla_laneq_f32(a, b, v, 0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define <4 x float> @test_vmlaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #1 {
|
||||
// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> zeroinitializer
|
||||
// CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
|
||||
// CHECK: [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]]
|
||||
// CHECK: ret <4 x float> [[ADD]]
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
|
||||
// CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <4 x i32> zeroinitializer
|
||||
// CHECK: [[MUL:%.*]] = fmul <4 x float> [[B:%.*]], [[LANE]]
|
||||
// CHECK: [[ADD:%.*]] = fadd <4 x float> [[A:%.*]], [[MUL]]
|
||||
// CHECK: ret <4 x float> [[ADD]]
|
||||
float32x4_t test_vmlaq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) {
|
||||
return vmlaq_laneq_f32(a, b, v, 0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define <2 x float> @test_vmls_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 {
|
||||
// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> zeroinitializer
|
||||
// CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
|
||||
// CHECK: [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]]
|
||||
// CHECK: ret <2 x float> [[SUB]]
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
|
||||
// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> zeroinitializer
|
||||
// CHECK: [[MUL:%.*]] = fmul <2 x float> [[B:%.*]], [[LANE]]
|
||||
// CHECK: [[SUB:%.*]] = fsub <2 x float> [[A:%.*]], [[MUL]]
|
||||
// CHECK: ret <2 x float> [[SUB]]
|
||||
float32x2_t test_vmls_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) {
|
||||
return vmls_lane_f32(a, b, v, 0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define <4 x float> @test_vmlsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #1 {
|
||||
// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> zeroinitializer
|
||||
// CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
|
||||
// CHECK: [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]]
|
||||
// CHECK: ret <4 x float> [[SUB]]
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
|
||||
// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> zeroinitializer
|
||||
// CHECK: [[MUL:%.*]] = fmul <4 x float> [[B:%.*]], [[LANE]]
|
||||
// CHECK: [[SUB:%.*]] = fsub <4 x float> [[A:%.*]], [[MUL]]
|
||||
// CHECK: ret <4 x float> [[SUB]]
|
||||
float32x4_t test_vmlsq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) {
|
||||
return vmlsq_lane_f32(a, b, v, 0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define <2 x float> @test_vmls_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #1 {
|
||||
// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> zeroinitializer
|
||||
// CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
|
||||
// CHECK: [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]]
|
||||
// CHECK: ret <2 x float> [[SUB]]
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
|
||||
// CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <2 x i32> zeroinitializer
|
||||
// CHECK: [[MUL:%.*]] = fmul <2 x float> [[B:%.*]], [[LANE]]
|
||||
// CHECK: [[SUB:%.*]] = fsub <2 x float> [[A:%.*]], [[MUL]]
|
||||
// CHECK: ret <2 x float> [[SUB]]
|
||||
float32x2_t test_vmls_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) {
|
||||
return vmls_laneq_f32(a, b, v, 0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define <4 x float> @test_vmlsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #1 {
|
||||
// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> zeroinitializer
|
||||
// CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
|
||||
// CHECK: [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]]
|
||||
// CHECK: ret <4 x float> [[SUB]]
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
|
||||
// CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <4 x i32> zeroinitializer
|
||||
// CHECK: [[MUL:%.*]] = fmul <4 x float> [[B:%.*]], [[LANE]]
|
||||
// CHECK: [[SUB:%.*]] = fsub <4 x float> [[A:%.*]], [[MUL]]
|
||||
// CHECK: ret <4 x float> [[SUB]]
|
||||
float32x4_t test_vmlsq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) {
|
||||
return vmlsq_laneq_f32(a, b, v, 0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define <2 x float> @test_vmla_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 {
|
||||
// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> <i32 1, i32 1>
|
||||
// CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
|
||||
// CHECK: [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]]
|
||||
// CHECK: ret <2 x float> [[ADD]]
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
|
||||
// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> <i32 1, i32 1>
|
||||
// CHECK: [[MUL:%.*]] = fmul <2 x float> [[B:%.*]], [[LANE]]
|
||||
// CHECK: [[ADD:%.*]] = fadd <2 x float> [[A:%.*]], [[MUL]]
|
||||
// CHECK: ret <2 x float> [[ADD]]
|
||||
float32x2_t test_vmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) {
|
||||
return vmla_lane_f32(a, b, v, 1);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define <4 x float> @test_vmlaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #1 {
|
||||
// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
// CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
|
||||
// CHECK: [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]]
|
||||
// CHECK: ret <4 x float> [[ADD]]
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
|
||||
// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
// CHECK: [[MUL:%.*]] = fmul <4 x float> [[B:%.*]], [[LANE]]
|
||||
// CHECK: [[ADD:%.*]] = fadd <4 x float> [[A:%.*]], [[MUL]]
|
||||
// CHECK: ret <4 x float> [[ADD]]
|
||||
float32x4_t test_vmlaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) {
|
||||
return vmlaq_lane_f32(a, b, v, 1);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define <2 x float> @test_vmla_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #1 {
|
||||
// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> <i32 3, i32 3>
|
||||
// CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
|
||||
// CHECK: [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]]
|
||||
// CHECK: ret <2 x float> [[ADD]]
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
|
||||
// CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <2 x i32> <i32 3, i32 3>
|
||||
// CHECK: [[MUL:%.*]] = fmul <2 x float> [[B:%.*]], [[LANE]]
|
||||
// CHECK: [[ADD:%.*]] = fadd <2 x float> [[A:%.*]], [[MUL]]
|
||||
// CHECK: ret <2 x float> [[ADD]]
|
||||
float32x2_t test_vmla_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) {
|
||||
return vmla_laneq_f32(a, b, v, 3);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define <4 x float> @test_vmlaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #1 {
|
||||
// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
|
||||
// CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
|
||||
// CHECK: [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]]
|
||||
// CHECK: ret <4 x float> [[ADD]]
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
|
||||
// CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
|
||||
// CHECK: [[MUL:%.*]] = fmul <4 x float> [[B:%.*]], [[LANE]]
|
||||
// CHECK: [[ADD:%.*]] = fadd <4 x float> [[A:%.*]], [[MUL]]
|
||||
// CHECK: ret <4 x float> [[ADD]]
|
||||
float32x4_t test_vmlaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) {
|
||||
return vmlaq_laneq_f32(a, b, v, 3);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define <2 x float> @test_vmls_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 {
|
||||
// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> <i32 1, i32 1>
|
||||
// CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
|
||||
// CHECK: [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]]
|
||||
// CHECK: ret <2 x float> [[SUB]]
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
|
||||
// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> <i32 1, i32 1>
|
||||
// CHECK: [[MUL:%.*]] = fmul <2 x float> [[B:%.*]], [[LANE]]
|
||||
// CHECK: [[SUB:%.*]] = fsub <2 x float> [[A:%.*]], [[MUL]]
|
||||
// CHECK: ret <2 x float> [[SUB]]
|
||||
float32x2_t test_vmls_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) {
|
||||
return vmls_lane_f32(a, b, v, 1);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define <4 x float> @test_vmlsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #1 {
|
||||
// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
// CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
|
||||
// CHECK: [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]]
|
||||
// CHECK: ret <4 x float> [[SUB]]
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
|
||||
// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
// CHECK: [[MUL:%.*]] = fmul <4 x float> [[B:%.*]], [[LANE]]
|
||||
// CHECK: [[SUB:%.*]] = fsub <4 x float> [[A:%.*]], [[MUL]]
|
||||
// CHECK: ret <4 x float> [[SUB]]
|
||||
//
|
||||
float32x4_t test_vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) {
|
||||
return vmlsq_lane_f32(a, b, v, 1);
|
||||
}
|
||||
// CHECK-LABEL: define <2 x float> @test_vmls_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #1 {
|
||||
// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> <i32 3, i32 3>
|
||||
// CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
|
||||
// CHECK: [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]]
|
||||
// CHECK: ret <2 x float> [[SUB]]
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
|
||||
// CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <2 x i32> <i32 3, i32 3>
|
||||
// CHECK: [[MUL:%.*]] = fmul <2 x float> [[B:%.*]], [[LANE]]
|
||||
// CHECK: [[SUB:%.*]] = fsub <2 x float> [[A:%.*]], [[MUL]]
|
||||
// CHECK: ret <2 x float> [[SUB]]
|
||||
float32x2_t test_vmls_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) {
|
||||
return vmls_laneq_f32(a, b, v, 3);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define <4 x float> @test_vmlsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #1 {
|
||||
// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
|
||||
// CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
|
||||
// CHECK: [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]]
|
||||
// CHECK: ret <4 x float> [[SUB]]
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
|
||||
// CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
|
||||
// CHECK: [[MUL:%.*]] = fmul <4 x float> [[B:%.*]], [[LANE]]
|
||||
// CHECK: [[SUB:%.*]] = fsub <4 x float> [[A:%.*]], [[MUL]]
|
||||
// CHECK: ret <4 x float> [[SUB]]
|
||||
float32x4_t test_vmlsq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) {
|
||||
return vmlsq_laneq_f32(a, b, v, 3);
|
||||
}
|
||||
|
|
|
@ -150,22 +150,28 @@ poly64x2_t test_vmovq_n_p64(poly64_t a) {
|
|||
}
|
||||
|
||||
// CHECK-LABEL: define <1 x i64> @test_vdup_lane_p64(<1 x i64> %vec) #0 {
|
||||
// CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x i64> %vec, <1 x i64> %vec, <1 x i32> zeroinitializer
|
||||
// CHECK: ret <1 x i64> [[SHUFFLE]]
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> [[VEC:%.*]] to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
|
||||
// CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <1 x i32> zeroinitializer
|
||||
// CHECK: ret <1 x i64> [[LANE]]
|
||||
poly64x1_t test_vdup_lane_p64(poly64x1_t vec) {
|
||||
return vdup_lane_p64(vec, 0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define <2 x i64> @test_vdupq_lane_p64(<1 x i64> %vec) #1 {
|
||||
// CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x i64> %vec, <1 x i64> %vec, <2 x i32> zeroinitializer
|
||||
// CHECK: ret <2 x i64> [[SHUFFLE]]
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> [[VEC:%.*]] to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
|
||||
// CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <2 x i32> zeroinitializer
|
||||
// CHECK: ret <2 x i64> [[LANE]]
|
||||
poly64x2_t test_vdupq_lane_p64(poly64x1_t vec) {
|
||||
return vdupq_lane_p64(vec, 0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define <2 x i64> @test_vdupq_laneq_p64(<2 x i64> %vec) #1 {
|
||||
// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i64> %vec, <2 x i64> %vec, <2 x i32> <i32 1, i32 1>
|
||||
// CHECK: ret <2 x i64> [[SHUFFLE]]
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> [[VEC:%.*]] to <16 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
|
||||
// CHECK: [[LANE:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP1]], <2 x i32> <i32 1, i32 1>
|
||||
// CHECK: ret <2 x i64> [[LANE]]
|
||||
poly64x2_t test_vdupq_laneq_p64(poly64x2_t vec) {
|
||||
return vdupq_laneq_p64(vec, 1);
|
||||
}
|
||||
|
|
|
@ -1086,32 +1086,40 @@ float16_t test_vfmsh_laneq_f16(float16_t a, float16_t b, float16x8_t c) {
|
|||
}
|
||||
|
||||
// CHECK-LABEL: test_vmul_lane_f16
|
||||
// CHECK: [[TMP0:%.*]] = shufflevector <4 x half> %b, <4 x half> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
|
||||
// CHECK: [[MUL:%.*]] = fmul <4 x half> %a, [[TMP0]]
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
|
||||
// CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
|
||||
// CHECK: [[MUL:%.*]] = fmul <4 x half> [[A:%.*]], [[LANE]]
|
||||
// CHECK: ret <4 x half> [[MUL]]
|
||||
float16x4_t test_vmul_lane_f16(float16x4_t a, float16x4_t b) {
|
||||
return vmul_lane_f16(a, b, 3);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vmulq_lane_f16
|
||||
// CHECK: [[TMP0:%.*]] = shufflevector <4 x half> %b, <4 x half> %b, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
|
||||
// CHECK: [[MUL:%.*]] = fmul <8 x half> %a, [[TMP0]]
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
|
||||
// CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
||||
// CHECK: [[MUL:%.*]] = fmul <8 x half> [[A:%.*]], [[LANE]]
|
||||
// CHECK: ret <8 x half> [[MUL]]
|
||||
float16x8_t test_vmulq_lane_f16(float16x8_t a, float16x4_t b) {
|
||||
return vmulq_lane_f16(a, b, 7);
|
||||
return vmulq_lane_f16(a, b, 3);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vmul_laneq_f16
|
||||
// CHECK: [[TMP0:%.*]] = shufflevector <8 x half> %b, <8 x half> %b, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
|
||||
// CHECK: [[MUL:%.*]] = fmul <4 x half> %a, [[TMP0]]
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
|
||||
// CHECK: [[LANE:%.*]] = shufflevector <8 x half> [[TMP1]], <8 x half> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
|
||||
// CHECK: [[MUL:%.*]] = fmul <4 x half> [[A:%.*]], [[LANE]]
|
||||
// CHECK: ret <4 x half> [[MUL]]
|
||||
float16x4_t test_vmul_laneq_f16(float16x4_t a, float16x8_t b) {
|
||||
return vmul_laneq_f16(a, b, 7);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vmulq_laneq_f16
|
||||
// CHECK: [[TMP0:%.*]] = shufflevector <8 x half> %b, <8 x half> %b, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
|
||||
// CHECK: [[MUL:%.*]] = fmul <8 x half> %a, [[TMP0]]
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
|
||||
// CHECK: [[LANE:%.*]] = shufflevector <8 x half> [[TMP1]], <8 x half> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
|
||||
// CHECK: [[MUL:%.*]] = fmul <8 x half> [[A:%.*]], [[LANE]]
|
||||
// CHECK: ret <8 x half> [[MUL]]
|
||||
float16x8_t test_vmulq_laneq_f16(float16x8_t a, float16x8_t b) {
|
||||
return vmulq_laneq_f16(a, b, 7);
|
||||
|
@ -1165,33 +1173,49 @@ float16_t test_vmulh_laneq_f16(float16_t a, float16x8_t b) {
|
|||
}
|
||||
|
||||
// CHECK-LABEL: test_vmulx_lane_f16
|
||||
// CHECK: [[TMP0:%.*]] = shufflevector <4 x half> %b, <4 x half> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
|
||||
// CHECK: [[MUL:%.*]] = call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> %a, <4 x half> [[TMP0]])
|
||||
// CHECK: ret <4 x half> [[MUL]]
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
|
||||
// CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
|
||||
// CHECK: [[TMP2:%.*]] = bitcast <4 x half> [[A:%.*]] to <8 x i8>
|
||||
// CHECK: [[TMP3:%.*]] = bitcast <4 x half> [[LANE]] to <8 x i8>
|
||||
// CHECK: [[VMULX2_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> [[A]], <4 x half> [[LANE]]) #4
|
||||
// CHECK: ret <4 x half> [[VMULX2_I]]
|
||||
float16x4_t test_vmulx_lane_f16(float16x4_t a, float16x4_t b) {
|
||||
return vmulx_lane_f16(a, b, 3);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vmulxq_lane_f16
|
||||
// CHECK: [[TMP0:%.*]] = shufflevector <4 x half> %b, <4 x half> %b, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
|
||||
// CHECK: [[MUL:%.*]] = call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> %a, <8 x half> [[TMP0]])
|
||||
// CHECK: ret <8 x half> [[MUL]]
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
|
||||
// CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
||||
// CHECK: [[TMP2:%.*]] = bitcast <8 x half> [[A:%.*]] to <16 x i8>
|
||||
// CHECK: [[TMP3:%.*]] = bitcast <8 x half> [[LANE]] to <16 x i8>
|
||||
// CHECK: [[VMULX2_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> [[A]], <8 x half> [[LANE]]) #4
|
||||
// CHECK: ret <8 x half> [[VMULX2_I]]
|
||||
float16x8_t test_vmulxq_lane_f16(float16x8_t a, float16x4_t b) {
|
||||
return vmulxq_lane_f16(a, b, 7);
|
||||
return vmulxq_lane_f16(a, b, 3);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vmulx_laneq_f16
|
||||
// CHECK: [[TMP0:%.*]] = shufflevector <8 x half> %b, <8 x half> %b, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
|
||||
// CHECK: [[MUL:%.*]] = call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> %a, <4 x half> [[TMP0]])
|
||||
// CHECK: ret <4 x half> [[MUL]]
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
|
||||
// CHECK: [[LANE:%.*]] = shufflevector <8 x half> [[TMP1]], <8 x half> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
|
||||
// CHECK: [[TMP2:%.*]] = bitcast <4 x half> [[A:%.*]] to <8 x i8>
|
||||
// CHECK: [[TMP3:%.*]] = bitcast <4 x half> [[LANE]] to <8 x i8>
|
||||
// CHECK: [[VMULX2_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> [[A]], <4 x half> [[LANE]]) #4
|
||||
// CHECK: ret <4 x half> [[VMULX2_I]]
|
||||
float16x4_t test_vmulx_laneq_f16(float16x4_t a, float16x8_t b) {
|
||||
return vmulx_laneq_f16(a, b, 7);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vmulxq_laneq_f16
|
||||
// CHECK: [[TMP0:%.*]] = shufflevector <8 x half> %b, <8 x half> %b, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
|
||||
// CHECK: [[MUL:%.*]] = call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> %a, <8 x half> [[TMP0]])
|
||||
// CHECK: ret <8 x half> [[MUL]]
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
|
||||
// CHECK: [[LANE:%.*]] = shufflevector <8 x half> [[TMP1]], <8 x half> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
|
||||
// CHECK: [[TMP2:%.*]] = bitcast <8 x half> [[A:%.*]] to <16 x i8>
|
||||
// CHECK: [[TMP3:%.*]] = bitcast <8 x half> [[LANE]] to <16 x i8>
|
||||
// CHECK: [[VMULX2_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> [[A]], <8 x half> [[LANE]]) #4
|
||||
// CHECK: ret <8 x half> [[VMULX2_I]]
|
||||
float16x8_t test_vmulxq_laneq_f16(float16x8_t a, float16x8_t b) {
|
||||
return vmulxq_laneq_f16(a, b, 7);
|
||||
}
|
||||
|
@ -1473,17 +1497,21 @@ float16x8_t test_vdupq_n_f16(float16_t a) {
|
|||
}
|
||||
|
||||
// CHECK-LABEL: test_vdup_lane_f16
|
||||
// CHECK: [[SHFL:%.*]] = shufflevector <4 x half> %a, <4 x half> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
|
||||
// CHECK: ret <4 x half> [[SHFL]]
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <4 x half> [[A:%.*]] to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
|
||||
// CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
|
||||
// CHECK: ret <4 x half> [[LANE]]
|
||||
float16x4_t test_vdup_lane_f16(float16x4_t a) {
|
||||
return vdup_lane_f16(a, 3);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vdupq_lane_f16
|
||||
// CHECK: [[SHFL:%.*]] = shufflevector <4 x half> %a, <4 x half> %a, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
|
||||
// CHECK: ret <8 x half> [[SHFL]]
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <4 x half> [[A:%.*]] to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
|
||||
// CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
||||
// CHECK: ret <8 x half> [[LANE]]
|
||||
float16x8_t test_vdupq_lane_f16(float16x4_t a) {
|
||||
return vdupq_lane_f16(a, 7);
|
||||
return vdupq_lane_f16(a, 3);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vext_f16(
|
||||
|
|
|
@ -0,0 +1,410 @@
|
|||
// RUN: %clang_cc1 -triple arm64-none-eabi -target-feature +neon -target-feature +dotprod -target-feature +v8.1a -verify %s
|
||||
// RUN: %clang_cc1 -triple armv8.1a-none-eabi -target-feature +neon -target-feature +dotprod -target-feature +v8.1a -verify %s
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
void test_vdot_lane(int32x2_t r, int8x8_t a, int8x8_t b) {
|
||||
vdot_lane_s32(r, a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
|
||||
vdot_lane_s32(r, a, b, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
|
||||
vdot_lane_s32(r, a, b, 0);
|
||||
vdot_lane_s32(r, a, b, 1);
|
||||
}
|
||||
|
||||
void test_vdotq_lane(int32x4_t r, int8x16_t a, int8x8_t b) {
|
||||
vdotq_lane_s32(r, a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
|
||||
vdotq_lane_s32(r, a, b, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
|
||||
vdotq_lane_s32(r, a, b, 0);
|
||||
vdotq_lane_s32(r, a, b, 1);
|
||||
}
|
||||
|
||||
#if defined(__aarch64__)
|
||||
void test_vdot_laneq(int32x2_t r, int8x8_t a, int8x16_t b) {
|
||||
vdot_laneq_s32(r, a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
|
||||
vdot_laneq_s32(r, a, b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
|
||||
vdot_laneq_s32(r, a, b, 0);
|
||||
vdot_laneq_s32(r, a, b, 3);
|
||||
}
|
||||
|
||||
void test_vdotq_laneq(int32x4_t r, int8x16_t a, int8x16_t b) {
|
||||
vdotq_laneq_s32(r, a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
|
||||
vdotq_laneq_s32(r, a, b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
|
||||
vdotq_laneq_s32(r, a, b, 0);
|
||||
vdotq_laneq_s32(r, a, b, 3);
|
||||
}
|
||||
#endif
|
||||
|
||||
void test_vdup_lane(int32x2_t v) {
|
||||
vdup_lane_s32(v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
|
||||
vdup_lane_s32(v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
|
||||
vdup_lane_s32(v, 0);
|
||||
vdup_lane_s32(v, 1);
|
||||
}
|
||||
|
||||
void test_vdupq_lane(int32x2_t v) {
|
||||
vdupq_lane_s32(v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
|
||||
vdupq_lane_s32(v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
|
||||
vdupq_lane_s32(v, 0);
|
||||
vdupq_lane_s32(v, 1);
|
||||
}
|
||||
|
||||
#if defined(__aarch64__)
|
||||
void test_vdup_laneq(int32x4_t v) {
|
||||
vdup_laneq_s32(v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
|
||||
vdup_laneq_s32(v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
|
||||
vdup_laneq_s32(v, 0);
|
||||
vdup_laneq_s32(v, 3);
|
||||
}
|
||||
|
||||
void test_vdupq_laneq(int32x4_t v) {
|
||||
vdupq_laneq_s32(v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
|
||||
vdupq_laneq_s32(v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
|
||||
vdupq_laneq_s32(v, 0);
|
||||
vdupq_laneq_s32(v, 3);
|
||||
}
|
||||
#endif
|
||||
|
||||
void test_vmla_lane(int32x2_t a, int32x2_t b, int32x2_t v) {
|
||||
vmla_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
|
||||
vmla_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
|
||||
vmla_lane_s32(a, b, v, 0);
|
||||
vmla_lane_s32(a, b, v, 1);
|
||||
}
|
||||
|
||||
void test_vmlaq_lane(int32x4_t a, int32x4_t b, int32x2_t v) {
|
||||
vmlaq_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
|
||||
vmlaq_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
|
||||
vmlaq_lane_s32(a, b, v, 0);
|
||||
vmlaq_lane_s32(a, b, v, 1);
|
||||
}
|
||||
|
||||
#if defined(__aarch64__)
|
||||
void test_vmla_laneq(int32x2_t a, int32x2_t b, int32x4_t v) {
|
||||
vmla_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
|
||||
vmla_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
|
||||
vmla_laneq_s32(a, b, v, 0);
|
||||
vmla_laneq_s32(a, b, v, 3);
|
||||
}
|
||||
|
||||
void test_vmlaq_laneq(int32x4_t a, int32x4_t b, int32x4_t v) {
|
||||
vmlaq_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
|
||||
vmlaq_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
|
||||
vmlaq_laneq_s32(a, b, v, 0);
|
||||
vmlaq_laneq_s32(a, b, v, 3);
|
||||
}
|
||||
|
||||
void test_vmlal_high_lane(int64x2_t a, int32x4_t b, int32x2_t v) {
|
||||
vmlal_high_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
|
||||
vmlal_high_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
|
||||
vmlal_high_lane_s32(a, b, v, 0);
|
||||
vmlal_high_lane_s32(a, b, v, 1);
|
||||
}
|
||||
|
||||
void test_vmlal_high_laneq(int64x2_t a, int32x4_t b, int32x4_t v) {
|
||||
vmlal_high_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
|
||||
vmlal_high_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
|
||||
vmlal_high_laneq_s32(a, b, v, 0);
|
||||
vmlal_high_laneq_s32(a, b, v, 3);
|
||||
}
|
||||
#endif
|
||||
|
||||
void test_vmlal_lane(int64x2_t a, int32x2_t b, int32x2_t v) {
|
||||
vmlal_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
|
||||
vmlal_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
|
||||
vmlal_lane_s32(a, b, v, 0);
|
||||
vmlal_lane_s32(a, b, v, 1);
|
||||
}
|
||||
|
||||
#if defined(__aarch64__)
|
||||
void test_vmlal_laneq(int64x2_t a, int32x2_t b, int32x4_t v) {
|
||||
vmlal_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
|
||||
vmlal_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
|
||||
vmlal_laneq_s32(a, b, v, 0);
|
||||
vmlal_laneq_s32(a, b, v, 3);
|
||||
}
|
||||
#endif
|
||||
|
||||
void test_vmls_lane(int32x2_t a, int32x2_t b, int32x2_t v) {
|
||||
vmls_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
|
||||
vmls_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
|
||||
vmls_lane_s32(a, b, v, 0);
|
||||
vmls_lane_s32(a, b, v, 1);
|
||||
}
|
||||
|
||||
void test_vmlsq_lane(int32x4_t a, int32x4_t b, int32x2_t v) {
|
||||
vmlsq_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
|
||||
vmlsq_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
|
||||
vmlsq_lane_s32(a, b, v, 0);
|
||||
vmlsq_lane_s32(a, b, v, 1);
|
||||
}
|
||||
|
||||
#if defined(__aarch64__)
|
||||
void test_vmls_laneq(int32x2_t a, int32x2_t b, int32x4_t v) {
|
||||
vmls_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
|
||||
vmls_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
|
||||
vmls_laneq_s32(a, b, v, 0);
|
||||
vmls_laneq_s32(a, b, v, 3);
|
||||
}
|
||||
|
||||
void test_vmlsq_laneq(int32x4_t a, int32x4_t b, int32x4_t v) {
|
||||
vmlsq_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
|
||||
vmlsq_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
|
||||
vmlsq_laneq_s32(a, b, v, 0);
|
||||
vmlsq_laneq_s32(a, b, v, 3);
|
||||
}
|
||||
|
||||
void test_vmlsl_high_lane(int64x2_t a, int32x4_t b, int32x2_t v) {
|
||||
vmlsl_high_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
|
||||
vmlsl_high_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
|
||||
vmlsl_high_lane_s32(a, b, v, 0);
|
||||
vmlsl_high_lane_s32(a, b, v, 1);
|
||||
}
|
||||
|
||||
void test_vmlsl_high_laneq(int64x2_t a, int32x4_t b, int32x4_t v) {
|
||||
vmlsl_high_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
|
||||
vmlsl_high_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
|
||||
vmlsl_high_laneq_s32(a, b, v, 0);
|
||||
vmlsl_high_laneq_s32(a, b, v, 3);
|
||||
}
|
||||
#endif
|
||||
|
||||
void test_vmlsl_lane(int64x2_t a, int32x2_t b, int32x2_t v) {
|
||||
vmlsl_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
|
||||
vmlsl_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
|
||||
vmlsl_lane_s32(a, b, v, 0);
|
||||
vmlsl_lane_s32(a, b, v, 1);
|
||||
}
|
||||
|
||||
#if defined(__aarch64__)
|
||||
void test_vmlsl_laneq(int64x2_t a, int32x2_t b, int32x4_t v) {
|
||||
vmlsl_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
|
||||
vmlsl_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
|
||||
vmlsl_laneq_s32(a, b, v, 0);
|
||||
vmlsl_laneq_s32(a, b, v, 3);
|
||||
}
|
||||
#endif
|
||||
|
||||
void test_vmull_lane(int32x2_t a, int32x2_t b) {
|
||||
vmull_lane_s32(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
|
||||
vmull_lane_s32(a, b, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
|
||||
vmull_lane_s32(a, b, 0);
|
||||
vmull_lane_s32(a, b, 1);
|
||||
}
|
||||
|
||||
#if defined(__aarch64__)
|
||||
void test_vmull_laneq(int32x2_t a, int32x4_t b) {
|
||||
vmull_laneq_s32(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
|
||||
vmull_laneq_s32(a, b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
|
||||
vmull_laneq_s32(a, b, 0);
|
||||
vmull_laneq_s32(a, b, 3);
|
||||
}
|
||||
|
||||
void test_vmull_high_lane(int32x4_t a, int32x2_t b) {
|
||||
vmull_high_lane_s32(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
|
||||
vmull_high_lane_s32(a, b, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
|
||||
vmull_high_lane_s32(a, b, 0);
|
||||
vmull_high_lane_s32(a, b, 1);
|
||||
}
|
||||
|
||||
void test_vmull_high_laneq(int32x4_t a, int32x4_t b) {
|
||||
vmull_high_laneq_s32(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
|
||||
vmull_high_laneq_s32(a, b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
|
||||
vmull_high_laneq_s32(a, b, 0);
|
||||
vmull_high_laneq_s32(a, b, 3);
|
||||
}
|
||||
|
||||
void test_vqdmlal_high_lane(int64x2_t a, int32x4_t b, int32x2_t v) {
|
||||
vqdmlal_high_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
|
||||
vqdmlal_high_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
|
||||
vqdmlal_high_lane_s32(a, b, v, 0);
|
||||
vqdmlal_high_lane_s32(a, b, v, 1);
|
||||
}
|
||||
|
||||
void test_vqdmlal_high_laneq(int64x2_t a, int32x4_t b, int32x4_t v) {
|
||||
vqdmlal_high_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
|
||||
vqdmlal_high_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
|
||||
vqdmlal_high_laneq_s32(a, b, v, 0);
|
||||
vqdmlal_high_laneq_s32(a, b, v, 3);
|
||||
}
|
||||
#endif
|
||||
|
||||
void test_vqdmlal_lane(int64x2_t a, int32x2_t b, int32x2_t v) {
|
||||
vqdmlal_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
|
||||
vqdmlal_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
|
||||
vqdmlal_lane_s32(a, b, v, 0);
|
||||
vqdmlal_lane_s32(a, b, v, 1);
|
||||
}
|
||||
|
||||
#if defined(__aarch64__)
|
||||
void test_vqdmlal_laneq(int64x2_t a, int32x2_t b, int32x4_t v) {
|
||||
vqdmlal_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
|
||||
vqdmlal_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
|
||||
vqdmlal_laneq_s32(a, b, v, 0);
|
||||
vqdmlal_laneq_s32(a, b, v, 3);
|
||||
}
|
||||
|
||||
void test_vqdmlsl_high_lane(int64x2_t a, int32x4_t b, int32x2_t v) {
|
||||
vqdmlsl_high_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
|
||||
vqdmlsl_high_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
|
||||
vqdmlsl_high_lane_s32(a, b, v, 0);
|
||||
vqdmlsl_high_lane_s32(a, b, v, 1);
|
||||
}
|
||||
|
||||
void test_vqdmlsl_high_laneq(int64x2_t a, int32x4_t b, int32x4_t v) {
|
||||
vqdmlsl_high_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
|
||||
vqdmlsl_high_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
|
||||
vqdmlsl_high_laneq_s32(a, b, v, 0);
|
||||
vqdmlsl_high_laneq_s32(a, b, v, 3);
|
||||
}
|
||||
#endif
|
||||
|
||||
void test_vqdmlsl_lane(int64x2_t a, int32x2_t b, int32x2_t v) {
|
||||
vqdmlsl_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
|
||||
vqdmlsl_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
|
||||
vqdmlsl_lane_s32(a, b, v, 0);
|
||||
vqdmlsl_lane_s32(a, b, v, 1);
|
||||
}
|
||||
|
||||
#if defined(__aarch64__)
|
||||
void test_vqdmlsl_laneq(int64x2_t a, int32x2_t b, int32x4_t v) {
|
||||
vqdmlsl_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
|
||||
vqdmlsl_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
|
||||
vqdmlsl_laneq_s32(a, b, v, 0);
|
||||
vqdmlsl_laneq_s32(a, b, v, 3);
|
||||
}
|
||||
#endif
|
||||
|
||||
void test_vqdmulh_lane(int32x2_t a, int32x2_t b) {
|
||||
vqdmulh_lane_s32(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
|
||||
vqdmulh_lane_s32(a, b, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
|
||||
vqdmulh_lane_s32(a, b, 0);
|
||||
vqdmulh_lane_s32(a, b, 1);
|
||||
}
|
||||
|
||||
#if defined(__aarch64__)
|
||||
void test_vqdmulh_laneq(int32x2_t a, int32x4_t b) {
|
||||
vqdmulh_laneq_s32(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
|
||||
vqdmulh_laneq_s32(a, b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
|
||||
vqdmulh_laneq_s32(a, b, 0);
|
||||
vqdmulh_laneq_s32(a, b, 3);
|
||||
}
|
||||
|
||||
void test_vqdmulhq_laneq(int32x4_t a, int32x4_t b) {
|
||||
vqdmulhq_laneq_s32(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
|
||||
vqdmulhq_laneq_s32(a, b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
|
||||
vqdmulhq_laneq_s32(a, b, 0);
|
||||
vqdmulhq_laneq_s32(a, b, 3);
|
||||
}
|
||||
|
||||
void test_vqdmull_high_lane(int32x4_t a, int32x2_t b) {
|
||||
vqdmull_high_lane_s32(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
|
||||
vqdmull_high_lane_s32(a, b, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
|
||||
vqdmull_high_lane_s32(a, b, 0);
|
||||
vqdmull_high_lane_s32(a, b, 1);
|
||||
}
|
||||
|
||||
void test_vqdmull_high_laneq(int32x4_t a, int32x4_t b) {
|
||||
vqdmull_high_laneq_s32(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
|
||||
vqdmull_high_laneq_s32(a, b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
|
||||
vqdmull_high_laneq_s32(a, b, 0);
|
||||
vqdmull_high_laneq_s32(a, b, 3);
|
||||
}
|
||||
#endif
|
||||
|
||||
void test_vqdmull_lane(int32x2_t a, int32x2_t v) {
|
||||
vqdmull_lane_s32(a, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
|
||||
vqdmull_lane_s32(a, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
|
||||
vqdmull_lane_s32(a, v, 0);
|
||||
vqdmull_lane_s32(a, v, 1);
|
||||
}
|
||||
|
||||
#if defined(__aarch64__)
|
||||
void test_vqdmull_laneq(int32x2_t a, int32x4_t v) {
|
||||
vqdmull_laneq_s32(a, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
|
||||
vqdmull_laneq_s32(a, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
|
||||
vqdmull_laneq_s32(a, v, 0);
|
||||
vqdmull_laneq_s32(a, v, 3);
|
||||
}
|
||||
#endif
|
||||
|
||||
void test_vqrdmlah_lane(int32x2_t a, int32x2_t b, int32x2_t v) {
|
||||
vqrdmlah_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
|
||||
vqrdmlah_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
|
||||
vqrdmlah_lane_s32(a, b, v, 0);
|
||||
vqrdmlah_lane_s32(a, b, v, 1);
|
||||
}
|
||||
|
||||
void test_vqrdmlahq_lane(int32x4_t a, int32x4_t b, int32x2_t v) {
|
||||
vqrdmlahq_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
|
||||
vqrdmlahq_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
|
||||
vqrdmlahq_lane_s32(a, b, v, 0);
|
||||
vqrdmlahq_lane_s32(a, b, v, 1);
|
||||
}
|
||||
|
||||
#if defined(__aarch64__)
|
||||
void test_vqrdmlah_laneq(int32x2_t a, int32x2_t b, int32x4_t v) {
|
||||
vqrdmlah_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
|
||||
vqrdmlah_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
|
||||
vqrdmlah_laneq_s32(a, b, v, 0);
|
||||
vqrdmlah_laneq_s32(a, b, v, 3);
|
||||
}
|
||||
|
||||
void test_vqrdmlahq_laneq(int32x4_t a, int32x4_t b, int32x4_t v) {
|
||||
vqrdmlahq_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
|
||||
vqrdmlahq_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
|
||||
vqrdmlahq_laneq_s32(a, b, v, 0);
|
||||
vqrdmlahq_laneq_s32(a, b, v, 3);
|
||||
}
|
||||
#endif
|
||||
|
||||
void test_vqrdmlsh_lane(int32x2_t a, int32x2_t b, int32x2_t v) {
|
||||
vqrdmlsh_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
|
||||
vqrdmlsh_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
|
||||
vqrdmlsh_lane_s32(a, b, v, 0);
|
||||
vqrdmlsh_lane_s32(a, b, v, 1);
|
||||
}
|
||||
|
||||
void test_vqrdmlshq_lane(int32x4_t a, int32x4_t b, int32x2_t v) {
|
||||
vqrdmlshq_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
|
||||
vqrdmlshq_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
|
||||
vqrdmlshq_lane_s32(a, b, v, 0);
|
||||
vqrdmlshq_lane_s32(a, b, v, 1);
|
||||
}
|
||||
|
||||
#if defined(__aarch64__)
|
||||
void test_vqrdmlsh_laneq(int32x2_t a, int32x2_t b, int32x4_t v) {
|
||||
vqrdmlsh_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
|
||||
vqrdmlsh_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
|
||||
vqrdmlsh_laneq_s32(a, b, v, 0);
|
||||
vqrdmlsh_laneq_s32(a, b, v, 3);
|
||||
}
|
||||
|
||||
void test_vqrdmlshq_laneq(int32x4_t a, int32x4_t b, int32x4_t v) {
|
||||
vqrdmlshq_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
|
||||
vqrdmlshq_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
|
||||
vqrdmlshq_laneq_s32(a, b, v, 0);
|
||||
vqrdmlshq_laneq_s32(a, b, v, 3);
|
||||
}
|
||||
#endif
|
||||
|
||||
void test_vqrdmulh_lane(int32x2_t a, int32x2_t v) {
|
||||
vqrdmulh_lane_s32(a, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
|
||||
vqrdmulh_lane_s32(a, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
|
||||
vqrdmulh_lane_s32(a, v, 0);
|
||||
vqrdmulh_lane_s32(a, v, 1);
|
||||
}
|
||||
|
||||
#if defined(__aarch64__)
|
||||
void test_vqrdmulh_laneq(int32x2_t a, int32x4_t v) {
|
||||
vqrdmulh_laneq_s32(a, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
|
||||
vqrdmulh_laneq_s32(a, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
|
||||
vqrdmulh_laneq_s32(a, v, 0);
|
||||
vqrdmulh_laneq_s32(a, v, 3);
|
||||
}
|
||||
|
||||
void test_vqrdmulhq_laneq(int32x4_t a, int32x4_t v) {
|
||||
vqrdmulhq_laneq_s32(a, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
|
||||
vqrdmulhq_laneq_s32(a, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
|
||||
vqrdmulhq_laneq_s32(a, v, 0);
|
||||
vqrdmulhq_laneq_s32(a, v, 3);
|
||||
}
|
||||
#endif
|
|
@ -773,19 +773,23 @@ float16x8_t test_vfmsq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
|
|||
}
|
||||
|
||||
// CHECK-LABEL: test_vmul_lane_f16
|
||||
// CHECK: [[TMP0:%.*]] = shufflevector <4 x half> %b, <4 x half> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
|
||||
// CHECK: [[MUL:%.*]] = fmul <4 x half> %a, [[TMP0]]
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
|
||||
// CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
|
||||
// CHECK: [[MUL:%.*]] = fmul <4 x half> [[A:%.*]], [[LANE]]
|
||||
// CHECK: ret <4 x half> [[MUL]]
|
||||
float16x4_t test_vmul_lane_f16(float16x4_t a, float16x4_t b) {
|
||||
return vmul_lane_f16(a, b, 3);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vmulq_lane_f16
|
||||
// CHECK: [[TMP0:%.*]] = shufflevector <4 x half> %b, <4 x half> %b, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
|
||||
// CHECK: [[MUL:%.*]] = fmul <8 x half> %a, [[TMP0]]
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
|
||||
// CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
||||
// CHECK: [[MUL:%.*]] = fmul <8 x half> [[A:%.*]], [[LANE]]
|
||||
// CHECK: ret <8 x half> [[MUL]]
|
||||
float16x8_t test_vmulq_lane_f16(float16x8_t a, float16x4_t b) {
|
||||
return vmulq_lane_f16(a, b, 7);
|
||||
return vmulq_lane_f16(a, b, 3);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vmul_n_f16
|
||||
|
@ -939,17 +943,21 @@ float16x8_t test_vdupq_n_f16(float16_t a) {
|
|||
}
|
||||
|
||||
// CHECK-LABEL: test_vdup_lane_f16
|
||||
// CHECK: [[SHFL:%.*]] = shufflevector <4 x half> %a, <4 x half> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
|
||||
// CHECK: ret <4 x half> [[SHFL]]
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <4 x half> [[A:%.*]] to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
|
||||
// CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
|
||||
// CHECK: ret <4 x half> [[LANE]]
|
||||
float16x4_t test_vdup_lane_f16(float16x4_t a) {
|
||||
return vdup_lane_f16(a, 3);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_vdupq_lane_f16
|
||||
// CHECK: [[SHFL:%.*]] = shufflevector <4 x half> %a, <4 x half> %a, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
|
||||
// CHECK: ret <8 x half> [[SHFL]]
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <4 x half> [[A:%.*]] to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
|
||||
// CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
||||
// CHECK: ret <8 x half> [[LANE]]
|
||||
float16x8_t test_vdupq_lane_f16(float16x4_t a) {
|
||||
return vdupq_lane_f16(a, 7);
|
||||
return vdupq_lane_f16(a, 3);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vext_f16(
|
||||
|
|
|
@ -28,7 +28,9 @@ float32x4_t test_vdupq_n_f32(float32_t w) {
|
|||
// this was in <rdar://problem/11778405>, but had already been implemented,
|
||||
// test anyway
|
||||
// CHECK-LABEL: define <2 x double> @test_vdupq_lane_f64(<1 x double> %V) #0 {
|
||||
// CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x double> %V, <1 x double> %V, <2 x i32> zeroinitializer
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %V to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
|
||||
// CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x double> [[TMP1]], <1 x double> [[TMP1]], <2 x i32> zeroinitializer
|
||||
// CHECK: ret <2 x double> [[SHUFFLE]]
|
||||
float64x2_t test_vdupq_lane_f64(float64x1_t V) {
|
||||
return vdupq_lane_f64(V, 0);
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -239,6 +239,11 @@ public:
|
|||
NumVectors = 1;
|
||||
}
|
||||
|
||||
void make32BitElement() {
|
||||
assert_with_loc(Bitwidth > 32, "Not enough bits to make it 32!");
|
||||
ElementBitwidth = 32;
|
||||
}
|
||||
|
||||
void doubleLanes() {
|
||||
assert_with_loc(Bitwidth != 128, "Can't get bigger than 128!");
|
||||
Bitwidth = 128;
|
||||
|
@ -1496,6 +1501,8 @@ std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCast(DagInit *DI,
|
|||
castToType.doubleLanes();
|
||||
} else if (SI->getAsUnquotedString() == "8") {
|
||||
castToType.makeInteger(8, true);
|
||||
} else if (SI->getAsUnquotedString() == "32") {
|
||||
castToType.make32BitElement();
|
||||
} else {
|
||||
castToType = Type::fromTypedefName(SI->getAsUnquotedString());
|
||||
assert_with_loc(!castToType.isVoid(), "Unknown typedef");
|
||||
|
|
Loading…
Reference in New Issue