[ARM] Enabling range checks on Neon intrinsics' lane arguments

Summary:
Range checks were not properly performed in the lane arguments of Neon
intrinsics implemented based on splat operations. Calls to those
intrinsics where translated to `__builtin__shufflevector` calls directly
by the pre-processor through the arm_neon.h macros, missing the chance
for the proper range checks.

This patch enables the range check by introducing an auxiliary splat
instruction in arm_neon.td, delaying the translation to shufflevector
calls to CGBuiltin.cpp in clang after the checks were performed.

Reviewers: jmolloy, t.p.northover, rsmith, olista01, ostannard

Reviewed By: ostannard

Subscribers: ostannard, dnsampaio, danielkiss, kristof.beyls, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D74619
This commit is contained in:
Lucas Prates 2020-03-05 16:45:03 +00:00
parent d42711625a
commit f56550cf7f
13 changed files with 2441 additions and 1244 deletions

View File

@ -51,39 +51,39 @@ def OP_FMLA_N : Op<(call "vfma", $p0, $p1, (dup $p2))>;
def OP_FMLS_N : Op<(call "vfma", $p0, (op "-", $p1), (dup $p2))>;
def OP_MLAL_N : Op<(op "+", $p0, (call "vmull", $p1, (dup $p2)))>;
def OP_MLSL_N : Op<(op "-", $p0, (call "vmull", $p1, (dup $p2)))>;
def OP_MUL_LN : Op<(op "*", $p0, (splat $p1, $p2))>;
def OP_MULX_LN : Op<(call "vmulx", $p0, (splat $p1, $p2))>;
def OP_MUL_LN : Op<(op "*", $p0, (call_mangled "splat_lane", $p1, $p2))>;
def OP_MULX_LN : Op<(call "vmulx", $p0, (call_mangled "splat_lane", $p1, $p2))>;
def OP_MULL_N : Op<(call "vmull", $p0, (dup $p1))>;
def OP_MULL_LN : Op<(call "vmull", $p0, (splat $p1, $p2))>;
def OP_MULLHi_LN: Op<(call "vmull", (call "vget_high", $p0), (splat $p1, $p2))>;
def OP_MLA_LN : Op<(op "+", $p0, (op "*", $p1, (splat $p2, $p3)))>;
def OP_MLS_LN : Op<(op "-", $p0, (op "*", $p1, (splat $p2, $p3)))>;
def OP_MLAL_LN : Op<(op "+", $p0, (call "vmull", $p1, (splat $p2, $p3)))>;
def OP_MULL_LN : Op<(call "vmull", $p0, (call_mangled "splat_lane", $p1, $p2))>;
def OP_MULLHi_LN: Op<(call "vmull", (call "vget_high", $p0), (call_mangled "splat_lane", $p1, $p2))>;
def OP_MLA_LN : Op<(op "+", $p0, (op "*", $p1, (call_mangled "splat_lane", $p2, $p3)))>;
def OP_MLS_LN : Op<(op "-", $p0, (op "*", $p1, (call_mangled "splat_lane", $p2, $p3)))>;
def OP_MLAL_LN : Op<(op "+", $p0, (call "vmull", $p1, (call_mangled "splat_lane", $p2, $p3)))>;
def OP_MLALHi_LN: Op<(op "+", $p0, (call "vmull", (call "vget_high", $p1),
(splat $p2, $p3)))>;
def OP_MLSL_LN : Op<(op "-", $p0, (call "vmull", $p1, (splat $p2, $p3)))>;
(call_mangled "splat_lane", $p2, $p3)))>;
def OP_MLSL_LN : Op<(op "-", $p0, (call "vmull", $p1, (call_mangled "splat_lane", $p2, $p3)))>;
def OP_MLSLHi_LN : Op<(op "-", $p0, (call "vmull", (call "vget_high", $p1),
(splat $p2, $p3)))>;
(call_mangled "splat_lane", $p2, $p3)))>;
def OP_QDMULL_N : Op<(call "vqdmull", $p0, (dup $p1))>;
def OP_QDMULL_LN : Op<(call "vqdmull", $p0, (splat $p1, $p2))>;
def OP_QDMULL_LN : Op<(call "vqdmull", $p0, (call_mangled "splat_lane", $p1, $p2))>;
def OP_QDMULLHi_LN : Op<(call "vqdmull", (call "vget_high", $p0),
(splat $p1, $p2))>;
(call_mangled "splat_lane", $p1, $p2))>;
def OP_QDMLAL_N : Op<(call "vqdmlal", $p0, $p1, (dup $p2))>;
def OP_QDMLAL_LN : Op<(call "vqdmlal", $p0, $p1, (splat $p2, $p3))>;
def OP_QDMLAL_LN : Op<(call "vqdmlal", $p0, $p1, (call_mangled "splat_lane", $p2, $p3))>;
def OP_QDMLALHi_LN : Op<(call "vqdmlal", $p0, (call "vget_high", $p1),
(splat $p2, $p3))>;
(call_mangled "splat_lane", $p2, $p3))>;
def OP_QDMLSL_N : Op<(call "vqdmlsl", $p0, $p1, (dup $p2))>;
def OP_QDMLSL_LN : Op<(call "vqdmlsl", $p0, $p1, (splat $p2, $p3))>;
def OP_QDMLSL_LN : Op<(call "vqdmlsl", $p0, $p1, (call_mangled "splat_lane", $p2, $p3))>;
def OP_QDMLSLHi_LN : Op<(call "vqdmlsl", $p0, (call "vget_high", $p1),
(splat $p2, $p3))>;
(call_mangled "splat_lane", $p2, $p3))>;
def OP_QDMULH_N : Op<(call "vqdmulh", $p0, (dup $p1))>;
def OP_QDMULH_LN : Op<(call "vqdmulh", $p0, (splat $p1, $p2))>;
def OP_QRDMULH_LN : Op<(call "vqrdmulh", $p0, (splat $p1, $p2))>;
def OP_QDMULH_LN : Op<(call "vqdmulh", $p0, (call_mangled "splat_lane", $p1, $p2))>;
def OP_QRDMULH_LN : Op<(call "vqrdmulh", $p0, (call_mangled "splat_lane", $p1, $p2))>;
def OP_QRDMULH_N : Op<(call "vqrdmulh", $p0, (dup $p1))>;
def OP_QRDMLAH : Op<(call "vqadd", $p0, (call "vqrdmulh", $p1, $p2))>;
def OP_QRDMLSH : Op<(call "vqsub", $p0, (call "vqrdmulh", $p1, $p2))>;
def OP_QRDMLAH_LN : Op<(call "vqadd", $p0, (call "vqrdmulh", $p1, (splat $p2, $p3)))>;
def OP_QRDMLSH_LN : Op<(call "vqsub", $p0, (call "vqrdmulh", $p1, (splat $p2, $p3)))>;
def OP_QRDMLAH_LN : Op<(call "vqadd", $p0, (call "vqrdmulh", $p1, (call_mangled "splat_lane", $p2, $p3)))>;
def OP_QRDMLSH_LN : Op<(call "vqsub", $p0, (call "vqrdmulh", $p1, (call_mangled "splat_lane", $p2, $p3)))>;
def OP_FMS_LN : Op<(call "vfma_lane", $p0, (op "-", $p1), $p2, $p3)>;
def OP_FMS_LNQ : Op<(call "vfma_laneq", $p0, (op "-", $p1), $p2, $p3)>;
def OP_TRN1 : Op<(shuffle $p0, $p1, (interleave (decimate mask0, 2),
@ -115,7 +115,7 @@ def OP_HI : Op<(shuffle $p0, $p0, (highhalf mask0))>;
def OP_LO : Op<(shuffle $p0, $p0, (lowhalf mask0))>;
def OP_CONC : Op<(shuffle $p0, $p1, (add mask0, mask1))>;
def OP_DUP : Op<(dup $p0)>;
def OP_DUP_LN : Op<(splat $p0, $p1)>;
def OP_DUP_LN : Op<(call_mangled "splat_lane", $p0, $p1)>;
def OP_SEL : Op<(cast "R", (op "|",
(op "&", $p0, (cast $p0, $p1)),
(op "&", (op "~", $p0), (cast $p0, $p2))))>;
@ -207,10 +207,10 @@ def OP_SCALAR_HALF_SET_LNQ : Op<(bitcast "float16x8_t",
def OP_DOT_LN
: Op<(call "vdot", $p0, $p1,
(bitcast $p1, (splat(bitcast "uint32x2_t", $p2), $p3)))>;
(bitcast $p1, (call_mangled "splat_lane", (bitcast "32", $p2), $p3)))>;
def OP_DOT_LNQ
: Op<(call "vdot", $p0, $p1,
(bitcast $p1, (splat(bitcast "uint32x4_t", $p2), $p3)))>;
(bitcast $p1, (call_mangled "splat_lane", (bitcast "32", $p2), $p3)))>;
def OP_FMLAL_LN : Op<(call "vfmlal_low", $p0, $p1,
(dup_typed $p1, (call "vget_lane", $p2, $p3)))>;
@ -222,7 +222,19 @@ def OP_FMLSL_LN_Hi : Op<(call "vfmlsl_high", $p0, $p1,
(dup_typed $p1, (call "vget_lane", $p2, $p3)))>;
//===----------------------------------------------------------------------===//
// Instructions
// Auxiliary Instructions
//===----------------------------------------------------------------------===//
// Splat operation - performs a range-checked splat over a vector
def SPLAT : WInst<"splat_lane", ".(!q)I",
"UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl">;
def SPLATQ : WInst<"splat_laneq", ".(!Q)I",
"UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl"> {
let isLaneQ = 1;
}
//===----------------------------------------------------------------------===//
// Intrinsics
//===----------------------------------------------------------------------===//
////////////////////////////////////////////////////////////////////////////////

View File

@ -88,6 +88,7 @@ def call_mangled;
// - "D" - Double the number of lanes in the type.
// - "8" - Convert type to an equivalent vector of 8-bit signed
// integers.
// - "32" - Convert type to an equivalent vector of 32-bit integers.
// example: (cast "R", "U", $p0) -> "(uint32x4_t)__p0" (assuming the return
// value is of type "int32x4_t".
// (cast $p0, "D", "8", $p1) -> "(int8x16_t)__p1" (assuming __p0
@ -109,12 +110,6 @@ def dup;
// example: (dup_typed $p1, $p2) -> "(float16x4_t) {__p2, __p2, __p2, __p2}"
// (assuming __p1 is float16x4_t, and __p2 is a compatible scalar).
def dup_typed;
// splat - Take a vector and a lane index, and return a vector of the same type
// containing repeated instances of the source vector at the lane index.
// example: (splat $p0, $p1) ->
// "__builtin_shufflevector(__p0, __p0, __p1, __p1, __p1, __p1)"
// (assuming __p0 has four elements).
def splat;
// save_temp - Create a temporary (local) variable. The variable takes a name
// based on the zero'th parameter and can be referenced using
// using that name in subsequent DAGs in the same

View File

@ -4495,10 +4495,15 @@ static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
}
}
Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C,
const ElementCount &Count) {
Value *SV = llvm::ConstantVector::getSplat(Count, C);
return Builder.CreateShuffleVector(V, V, SV, "lane");
}
Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
ElementCount EC = V->getType()->getVectorElementCount();
Value *SV = llvm::ConstantVector::getSplat(EC, C);
return Builder.CreateShuffleVector(V, V, SV, "lane");
return EmitNeonSplat(V, C, EC);
}
Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
@ -4605,6 +4610,10 @@ struct ARMVectorIntrinsicInfo {
TypeModifier }
static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
NEONMAP0(splat_lane_v),
NEONMAP0(splat_laneq_v),
NEONMAP0(splatq_lane_v),
NEONMAP0(splatq_laneq_v),
NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
NEONMAP1(vabs_v, arm_neon_vabs, 0),
@ -4886,6 +4895,10 @@ static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
};
static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
NEONMAP0(splat_lane_v),
NEONMAP0(splat_laneq_v),
NEONMAP0(splatq_lane_v),
NEONMAP0(splatq_laneq_v),
NEONMAP1(vabs_v, aarch64_neon_abs, 0),
NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
NEONMAP0(vaddhn_v),
@ -5460,6 +5473,19 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
switch (BuiltinID) {
default: break;
case NEON::BI__builtin_neon_splat_lane_v:
case NEON::BI__builtin_neon_splat_laneq_v:
case NEON::BI__builtin_neon_splatq_lane_v:
case NEON::BI__builtin_neon_splatq_laneq_v: {
auto NumElements = VTy->getElementCount();
if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
NumElements = NumElements * 2;
if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
NumElements = NumElements / 2;
Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
}
case NEON::BI__builtin_neon_vpadd_v:
case NEON::BI__builtin_neon_vpaddq_v:
// We don't allow fp/int overloading of intrinsics.

View File

@ -3894,6 +3894,8 @@ public:
SmallVectorImpl<llvm::Value*> &O,
const char *name,
unsigned shift = 0, bool rightshift = false);
llvm::Value *EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx,
const llvm::ElementCount &Count);
llvm::Value *EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx);
llvm::Value *EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty,
bool negateForRightShift);

File diff suppressed because it is too large Load Diff

View File

@ -69,144 +69,177 @@ float64x2_t test_vmlsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
}
// CHECK-LABEL: define <2 x float> @test_vmla_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 {
// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> zeroinitializer
// CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
// CHECK: [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]]
// CHECK: ret <2 x float> [[ADD]]
// CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> zeroinitializer
// CHECK: [[MUL:%.*]] = fmul <2 x float> [[B:%.*]], [[LANE]]
// CHECK: [[ADD:%.*]] = fadd <2 x float> [[A:%.*]], [[MUL]]
// CHECK: ret <2 x float> [[ADD]]
float32x2_t test_vmla_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) {
return vmla_lane_f32(a, b, v, 0);
}
// CHECK-LABEL: define <4 x float> @test_vmlaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #1 {
// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> zeroinitializer
// CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
// CHECK: [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]]
// CHECK: ret <4 x float> [[ADD]]
// CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> zeroinitializer
// CHECK: [[MUL:%.*]] = fmul <4 x float> [[B:%.*]], [[LANE]]
// CHECK: [[ADD:%.*]] = fadd <4 x float> [[A:%.*]], [[MUL]]
// CHECK: ret <4 x float> [[ADD]]
float32x4_t test_vmlaq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) {
return vmlaq_lane_f32(a, b, v, 0);
}
// CHECK-LABEL: define <2 x float> @test_vmla_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #1 {
// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> zeroinitializer
// CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
// CHECK: [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]]
// CHECK: ret <2 x float> [[ADD]]
// CHECK: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
// CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <2 x i32> zeroinitializer
// CHECK: [[MUL:%.*]] = fmul <2 x float> [[B:%.*]], [[LANE]]
// CHECK: [[ADD:%.*]] = fadd <2 x float> [[A:%.*]], [[MUL]]
// CHECK: ret <2 x float> [[ADD]]
float32x2_t test_vmla_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) {
return vmla_laneq_f32(a, b, v, 0);
}
// CHECK-LABEL: define <4 x float> @test_vmlaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #1 {
// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> zeroinitializer
// CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
// CHECK: [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]]
// CHECK: ret <4 x float> [[ADD]]
// CHECK: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
// CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <4 x i32> zeroinitializer
// CHECK: [[MUL:%.*]] = fmul <4 x float> [[B:%.*]], [[LANE]]
// CHECK: [[ADD:%.*]] = fadd <4 x float> [[A:%.*]], [[MUL]]
// CHECK: ret <4 x float> [[ADD]]
float32x4_t test_vmlaq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) {
return vmlaq_laneq_f32(a, b, v, 0);
}
// CHECK-LABEL: define <2 x float> @test_vmls_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 {
// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> zeroinitializer
// CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
// CHECK: [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]]
// CHECK: ret <2 x float> [[SUB]]
// CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> zeroinitializer
// CHECK: [[MUL:%.*]] = fmul <2 x float> [[B:%.*]], [[LANE]]
// CHECK: [[SUB:%.*]] = fsub <2 x float> [[A:%.*]], [[MUL]]
// CHECK: ret <2 x float> [[SUB]]
float32x2_t test_vmls_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) {
return vmls_lane_f32(a, b, v, 0);
}
// CHECK-LABEL: define <4 x float> @test_vmlsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #1 {
// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> zeroinitializer
// CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
// CHECK: [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]]
// CHECK: ret <4 x float> [[SUB]]
// CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> zeroinitializer
// CHECK: [[MUL:%.*]] = fmul <4 x float> [[B:%.*]], [[LANE]]
// CHECK: [[SUB:%.*]] = fsub <4 x float> [[A:%.*]], [[MUL]]
// CHECK: ret <4 x float> [[SUB]]
float32x4_t test_vmlsq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) {
return vmlsq_lane_f32(a, b, v, 0);
}
// CHECK-LABEL: define <2 x float> @test_vmls_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #1 {
// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> zeroinitializer
// CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
// CHECK: [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]]
// CHECK: ret <2 x float> [[SUB]]
// CHECK: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
// CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <2 x i32> zeroinitializer
// CHECK: [[MUL:%.*]] = fmul <2 x float> [[B:%.*]], [[LANE]]
// CHECK: [[SUB:%.*]] = fsub <2 x float> [[A:%.*]], [[MUL]]
// CHECK: ret <2 x float> [[SUB]]
float32x2_t test_vmls_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) {
return vmls_laneq_f32(a, b, v, 0);
}
// CHECK-LABEL: define <4 x float> @test_vmlsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #1 {
// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> zeroinitializer
// CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
// CHECK: [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]]
// CHECK: ret <4 x float> [[SUB]]
// CHECK: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
// CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <4 x i32> zeroinitializer
// CHECK: [[MUL:%.*]] = fmul <4 x float> [[B:%.*]], [[LANE]]
// CHECK: [[SUB:%.*]] = fsub <4 x float> [[A:%.*]], [[MUL]]
// CHECK: ret <4 x float> [[SUB]]
float32x4_t test_vmlsq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) {
return vmlsq_laneq_f32(a, b, v, 0);
}
// CHECK-LABEL: define <2 x float> @test_vmla_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 {
// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> <i32 1, i32 1>
// CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
// CHECK: [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]]
// CHECK: ret <2 x float> [[ADD]]
// CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> <i32 1, i32 1>
// CHECK: [[MUL:%.*]] = fmul <2 x float> [[B:%.*]], [[LANE]]
// CHECK: [[ADD:%.*]] = fadd <2 x float> [[A:%.*]], [[MUL]]
// CHECK: ret <2 x float> [[ADD]]
float32x2_t test_vmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) {
return vmla_lane_f32(a, b, v, 1);
}
// CHECK-LABEL: define <4 x float> @test_vmlaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #1 {
// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
// CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
// CHECK: [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]]
// CHECK: ret <4 x float> [[ADD]]
// CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
// CHECK: [[MUL:%.*]] = fmul <4 x float> [[B:%.*]], [[LANE]]
// CHECK: [[ADD:%.*]] = fadd <4 x float> [[A:%.*]], [[MUL]]
// CHECK: ret <4 x float> [[ADD]]
float32x4_t test_vmlaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) {
return vmlaq_lane_f32(a, b, v, 1);
}
// CHECK-LABEL: define <2 x float> @test_vmla_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #1 {
// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> <i32 3, i32 3>
// CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
// CHECK: [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]]
// CHECK: ret <2 x float> [[ADD]]
// CHECK: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
// CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <2 x i32> <i32 3, i32 3>
// CHECK: [[MUL:%.*]] = fmul <2 x float> [[B:%.*]], [[LANE]]
// CHECK: [[ADD:%.*]] = fadd <2 x float> [[A:%.*]], [[MUL]]
// CHECK: ret <2 x float> [[ADD]]
float32x2_t test_vmla_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) {
return vmla_laneq_f32(a, b, v, 3);
}
// CHECK-LABEL: define <4 x float> @test_vmlaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #1 {
// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
// CHECK: [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]]
// CHECK: ret <4 x float> [[ADD]]
// CHECK: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
// CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK: [[MUL:%.*]] = fmul <4 x float> [[B:%.*]], [[LANE]]
// CHECK: [[ADD:%.*]] = fadd <4 x float> [[A:%.*]], [[MUL]]
// CHECK: ret <4 x float> [[ADD]]
float32x4_t test_vmlaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) {
return vmlaq_laneq_f32(a, b, v, 3);
}
// CHECK-LABEL: define <2 x float> @test_vmls_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 {
// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> <i32 1, i32 1>
// CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
// CHECK: [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]]
// CHECK: ret <2 x float> [[SUB]]
// CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> <i32 1, i32 1>
// CHECK: [[MUL:%.*]] = fmul <2 x float> [[B:%.*]], [[LANE]]
// CHECK: [[SUB:%.*]] = fsub <2 x float> [[A:%.*]], [[MUL]]
// CHECK: ret <2 x float> [[SUB]]
float32x2_t test_vmls_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) {
return vmls_lane_f32(a, b, v, 1);
}
// CHECK-LABEL: define <4 x float> @test_vmlsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #1 {
// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
// CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
// CHECK: [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]]
// CHECK: ret <4 x float> [[SUB]]
// CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
// CHECK: [[MUL:%.*]] = fmul <4 x float> [[B:%.*]], [[LANE]]
// CHECK: [[SUB:%.*]] = fsub <4 x float> [[A:%.*]], [[MUL]]
// CHECK: ret <4 x float> [[SUB]]
//
float32x4_t test_vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) {
return vmlsq_lane_f32(a, b, v, 1);
}
// CHECK-LABEL: define <2 x float> @test_vmls_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #1 {
// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> <i32 3, i32 3>
// CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
// CHECK: [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]]
// CHECK: ret <2 x float> [[SUB]]
// CHECK: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
// CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <2 x i32> <i32 3, i32 3>
// CHECK: [[MUL:%.*]] = fmul <2 x float> [[B:%.*]], [[LANE]]
// CHECK: [[SUB:%.*]] = fsub <2 x float> [[A:%.*]], [[MUL]]
// CHECK: ret <2 x float> [[SUB]]
float32x2_t test_vmls_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) {
return vmls_laneq_f32(a, b, v, 3);
}
// CHECK-LABEL: define <4 x float> @test_vmlsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #1 {
// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
// CHECK: [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]]
// CHECK: ret <4 x float> [[SUB]]
// CHECK: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
// CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK: [[MUL:%.*]] = fmul <4 x float> [[B:%.*]], [[LANE]]
// CHECK: [[SUB:%.*]] = fsub <4 x float> [[A:%.*]], [[MUL]]
// CHECK: ret <4 x float> [[SUB]]
float32x4_t test_vmlsq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) {
return vmlsq_laneq_f32(a, b, v, 3);
}

View File

@ -150,22 +150,28 @@ poly64x2_t test_vmovq_n_p64(poly64_t a) {
}
// CHECK-LABEL: define <1 x i64> @test_vdup_lane_p64(<1 x i64> %vec) #0 {
// CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x i64> %vec, <1 x i64> %vec, <1 x i32> zeroinitializer
// CHECK: ret <1 x i64> [[SHUFFLE]]
// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> [[VEC:%.*]] to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
// CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <1 x i32> zeroinitializer
// CHECK: ret <1 x i64> [[LANE]]
poly64x1_t test_vdup_lane_p64(poly64x1_t vec) {
return vdup_lane_p64(vec, 0);
}
// CHECK-LABEL: define <2 x i64> @test_vdupq_lane_p64(<1 x i64> %vec) #1 {
// CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x i64> %vec, <1 x i64> %vec, <2 x i32> zeroinitializer
// CHECK: ret <2 x i64> [[SHUFFLE]]
// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> [[VEC:%.*]] to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
// CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <2 x i32> zeroinitializer
// CHECK: ret <2 x i64> [[LANE]]
poly64x2_t test_vdupq_lane_p64(poly64x1_t vec) {
return vdupq_lane_p64(vec, 0);
}
// CHECK-LABEL: define <2 x i64> @test_vdupq_laneq_p64(<2 x i64> %vec) #1 {
// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i64> %vec, <2 x i64> %vec, <2 x i32> <i32 1, i32 1>
// CHECK: ret <2 x i64> [[SHUFFLE]]
// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> [[VEC:%.*]] to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
// CHECK: [[LANE:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP1]], <2 x i32> <i32 1, i32 1>
// CHECK: ret <2 x i64> [[LANE]]
poly64x2_t test_vdupq_laneq_p64(poly64x2_t vec) {
return vdupq_laneq_p64(vec, 1);
}

View File

@ -1086,32 +1086,40 @@ float16_t test_vfmsh_laneq_f16(float16_t a, float16_t b, float16x8_t c) {
}
// CHECK-LABEL: test_vmul_lane_f16
// CHECK: [[TMP0:%.*]] = shufflevector <4 x half> %b, <4 x half> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK: [[MUL:%.*]] = fmul <4 x half> %a, [[TMP0]]
// CHECK: [[TMP0:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
// CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK: [[MUL:%.*]] = fmul <4 x half> [[A:%.*]], [[LANE]]
// CHECK: ret <4 x half> [[MUL]]
float16x4_t test_vmul_lane_f16(float16x4_t a, float16x4_t b) {
return vmul_lane_f16(a, b, 3);
}
// CHECK-LABEL: test_vmulq_lane_f16
// CHECK: [[TMP0:%.*]] = shufflevector <4 x half> %b, <4 x half> %b, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
// CHECK: [[MUL:%.*]] = fmul <8 x half> %a, [[TMP0]]
// CHECK: [[TMP0:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
// CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
// CHECK: [[MUL:%.*]] = fmul <8 x half> [[A:%.*]], [[LANE]]
// CHECK: ret <8 x half> [[MUL]]
float16x8_t test_vmulq_lane_f16(float16x8_t a, float16x4_t b) {
return vmulq_lane_f16(a, b, 7);
return vmulq_lane_f16(a, b, 3);
}
// CHECK-LABEL: test_vmul_laneq_f16
// CHECK: [[TMP0:%.*]] = shufflevector <8 x half> %b, <8 x half> %b, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
// CHECK: [[MUL:%.*]] = fmul <4 x half> %a, [[TMP0]]
// CHECK: [[TMP0:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
// CHECK: [[LANE:%.*]] = shufflevector <8 x half> [[TMP1]], <8 x half> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
// CHECK: [[MUL:%.*]] = fmul <4 x half> [[A:%.*]], [[LANE]]
// CHECK: ret <4 x half> [[MUL]]
float16x4_t test_vmul_laneq_f16(float16x4_t a, float16x8_t b) {
return vmul_laneq_f16(a, b, 7);
}
// CHECK-LABEL: test_vmulq_laneq_f16
// CHECK: [[TMP0:%.*]] = shufflevector <8 x half> %b, <8 x half> %b, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
// CHECK: [[MUL:%.*]] = fmul <8 x half> %a, [[TMP0]]
// CHECK: [[TMP0:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
// CHECK: [[LANE:%.*]] = shufflevector <8 x half> [[TMP1]], <8 x half> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
// CHECK: [[MUL:%.*]] = fmul <8 x half> [[A:%.*]], [[LANE]]
// CHECK: ret <8 x half> [[MUL]]
float16x8_t test_vmulq_laneq_f16(float16x8_t a, float16x8_t b) {
return vmulq_laneq_f16(a, b, 7);
@ -1165,33 +1173,49 @@ float16_t test_vmulh_laneq_f16(float16_t a, float16x8_t b) {
}
// CHECK-LABEL: test_vmulx_lane_f16
// CHECK: [[TMP0:%.*]] = shufflevector <4 x half> %b, <4 x half> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK: [[MUL:%.*]] = call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> %a, <4 x half> [[TMP0]])
// CHECK: ret <4 x half> [[MUL]]
// CHECK: [[TMP0:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
// CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK: [[TMP2:%.*]] = bitcast <4 x half> [[A:%.*]] to <8 x i8>
// CHECK: [[TMP3:%.*]] = bitcast <4 x half> [[LANE]] to <8 x i8>
// CHECK: [[VMULX2_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> [[A]], <4 x half> [[LANE]]) #4
// CHECK: ret <4 x half> [[VMULX2_I]]
float16x4_t test_vmulx_lane_f16(float16x4_t a, float16x4_t b) {
return vmulx_lane_f16(a, b, 3);
}
// CHECK-LABEL: test_vmulxq_lane_f16
// CHECK: [[TMP0:%.*]] = shufflevector <4 x half> %b, <4 x half> %b, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
// CHECK: [[MUL:%.*]] = call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> %a, <8 x half> [[TMP0]])
// CHECK: ret <8 x half> [[MUL]]
// CHECK: [[TMP0:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
// CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
// CHECK: [[TMP2:%.*]] = bitcast <8 x half> [[A:%.*]] to <16 x i8>
// CHECK: [[TMP3:%.*]] = bitcast <8 x half> [[LANE]] to <16 x i8>
// CHECK: [[VMULX2_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> [[A]], <8 x half> [[LANE]]) #4
// CHECK: ret <8 x half> [[VMULX2_I]]
float16x8_t test_vmulxq_lane_f16(float16x8_t a, float16x4_t b) {
return vmulxq_lane_f16(a, b, 7);
return vmulxq_lane_f16(a, b, 3);
}
// CHECK-LABEL: test_vmulx_laneq_f16
// CHECK: [[TMP0:%.*]] = shufflevector <8 x half> %b, <8 x half> %b, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
// CHECK: [[MUL:%.*]] = call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> %a, <4 x half> [[TMP0]])
// CHECK: ret <4 x half> [[MUL]]
// CHECK: [[TMP0:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
// CHECK: [[LANE:%.*]] = shufflevector <8 x half> [[TMP1]], <8 x half> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
// CHECK: [[TMP2:%.*]] = bitcast <4 x half> [[A:%.*]] to <8 x i8>
// CHECK: [[TMP3:%.*]] = bitcast <4 x half> [[LANE]] to <8 x i8>
// CHECK: [[VMULX2_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> [[A]], <4 x half> [[LANE]]) #4
// CHECK: ret <4 x half> [[VMULX2_I]]
float16x4_t test_vmulx_laneq_f16(float16x4_t a, float16x8_t b) {
return vmulx_laneq_f16(a, b, 7);
}
// CHECK-LABEL: test_vmulxq_laneq_f16
// CHECK: [[TMP0:%.*]] = shufflevector <8 x half> %b, <8 x half> %b, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
// CHECK: [[MUL:%.*]] = call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> %a, <8 x half> [[TMP0]])
// CHECK: ret <8 x half> [[MUL]]
// CHECK: [[TMP0:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
// CHECK: [[LANE:%.*]] = shufflevector <8 x half> [[TMP1]], <8 x half> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
// CHECK: [[TMP2:%.*]] = bitcast <8 x half> [[A:%.*]] to <16 x i8>
// CHECK: [[TMP3:%.*]] = bitcast <8 x half> [[LANE]] to <16 x i8>
// CHECK: [[VMULX2_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> [[A]], <8 x half> [[LANE]]) #4
// CHECK: ret <8 x half> [[VMULX2_I]]
float16x8_t test_vmulxq_laneq_f16(float16x8_t a, float16x8_t b) {
return vmulxq_laneq_f16(a, b, 7);
}
@ -1473,17 +1497,21 @@ float16x8_t test_vdupq_n_f16(float16_t a) {
}
// CHECK-LABEL: test_vdup_lane_f16
// CHECK: [[SHFL:%.*]] = shufflevector <4 x half> %a, <4 x half> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK: ret <4 x half> [[SHFL]]
// CHECK: [[TMP0:%.*]] = bitcast <4 x half> [[A:%.*]] to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
// CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK: ret <4 x half> [[LANE]]
float16x4_t test_vdup_lane_f16(float16x4_t a) {
return vdup_lane_f16(a, 3);
}
// CHECK-LABEL: test_vdupq_lane_f16
// CHECK: [[SHFL:%.*]] = shufflevector <4 x half> %a, <4 x half> %a, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
// CHECK: ret <8 x half> [[SHFL]]
// CHECK: [[TMP0:%.*]] = bitcast <4 x half> [[A:%.*]] to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
// CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
// CHECK: ret <8 x half> [[LANE]]
float16x8_t test_vdupq_lane_f16(float16x4_t a) {
return vdupq_lane_f16(a, 7);
return vdupq_lane_f16(a, 3);
}
// CHECK-LABEL: @test_vext_f16(

View File

@ -0,0 +1,410 @@
// RUN: %clang_cc1 -triple arm64-none-eabi -target-feature +neon -target-feature +dotprod -target-feature +v8.1a -verify %s
// RUN: %clang_cc1 -triple armv8.1a-none-eabi -target-feature +neon -target-feature +dotprod -target-feature +v8.1a -verify %s
#include <arm_neon.h>
void test_vdot_lane(int32x2_t r, int8x8_t a, int8x8_t b) {
vdot_lane_s32(r, a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
vdot_lane_s32(r, a, b, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
vdot_lane_s32(r, a, b, 0);
vdot_lane_s32(r, a, b, 1);
}
void test_vdotq_lane(int32x4_t r, int8x16_t a, int8x8_t b) {
vdotq_lane_s32(r, a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
vdotq_lane_s32(r, a, b, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
vdotq_lane_s32(r, a, b, 0);
vdotq_lane_s32(r, a, b, 1);
}
#if defined(__aarch64__)
void test_vdot_laneq(int32x2_t r, int8x8_t a, int8x16_t b) {
vdot_laneq_s32(r, a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
vdot_laneq_s32(r, a, b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
vdot_laneq_s32(r, a, b, 0);
vdot_laneq_s32(r, a, b, 3);
}
void test_vdotq_laneq(int32x4_t r, int8x16_t a, int8x16_t b) {
vdotq_laneq_s32(r, a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
vdotq_laneq_s32(r, a, b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
vdotq_laneq_s32(r, a, b, 0);
vdotq_laneq_s32(r, a, b, 3);
}
#endif
void test_vdup_lane(int32x2_t v) {
vdup_lane_s32(v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
vdup_lane_s32(v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
vdup_lane_s32(v, 0);
vdup_lane_s32(v, 1);
}
void test_vdupq_lane(int32x2_t v) {
vdupq_lane_s32(v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
vdupq_lane_s32(v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
vdupq_lane_s32(v, 0);
vdupq_lane_s32(v, 1);
}
#if defined(__aarch64__)
void test_vdup_laneq(int32x4_t v) {
vdup_laneq_s32(v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
vdup_laneq_s32(v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
vdup_laneq_s32(v, 0);
vdup_laneq_s32(v, 3);
}
void test_vdupq_laneq(int32x4_t v) {
vdupq_laneq_s32(v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
vdupq_laneq_s32(v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
vdupq_laneq_s32(v, 0);
vdupq_laneq_s32(v, 3);
}
#endif
void test_vmla_lane(int32x2_t a, int32x2_t b, int32x2_t v) {
vmla_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
vmla_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
vmla_lane_s32(a, b, v, 0);
vmla_lane_s32(a, b, v, 1);
}
void test_vmlaq_lane(int32x4_t a, int32x4_t b, int32x2_t v) {
vmlaq_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
vmlaq_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
vmlaq_lane_s32(a, b, v, 0);
vmlaq_lane_s32(a, b, v, 1);
}
#if defined(__aarch64__)
void test_vmla_laneq(int32x2_t a, int32x2_t b, int32x4_t v) {
vmla_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
vmla_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
vmla_laneq_s32(a, b, v, 0);
vmla_laneq_s32(a, b, v, 3);
}
void test_vmlaq_laneq(int32x4_t a, int32x4_t b, int32x4_t v) {
vmlaq_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
vmlaq_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
vmlaq_laneq_s32(a, b, v, 0);
vmlaq_laneq_s32(a, b, v, 3);
}
void test_vmlal_high_lane(int64x2_t a, int32x4_t b, int32x2_t v) {
vmlal_high_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
vmlal_high_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
vmlal_high_lane_s32(a, b, v, 0);
vmlal_high_lane_s32(a, b, v, 1);
}
void test_vmlal_high_laneq(int64x2_t a, int32x4_t b, int32x4_t v) {
vmlal_high_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
vmlal_high_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
vmlal_high_laneq_s32(a, b, v, 0);
vmlal_high_laneq_s32(a, b, v, 3);
}
#endif
void test_vmlal_lane(int64x2_t a, int32x2_t b, int32x2_t v) {
vmlal_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
vmlal_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
vmlal_lane_s32(a, b, v, 0);
vmlal_lane_s32(a, b, v, 1);
}
#if defined(__aarch64__)
void test_vmlal_laneq(int64x2_t a, int32x2_t b, int32x4_t v) {
vmlal_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
vmlal_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
vmlal_laneq_s32(a, b, v, 0);
vmlal_laneq_s32(a, b, v, 3);
}
#endif
void test_vmls_lane(int32x2_t a, int32x2_t b, int32x2_t v) {
vmls_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
vmls_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
vmls_lane_s32(a, b, v, 0);
vmls_lane_s32(a, b, v, 1);
}
void test_vmlsq_lane(int32x4_t a, int32x4_t b, int32x2_t v) {
vmlsq_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
vmlsq_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
vmlsq_lane_s32(a, b, v, 0);
vmlsq_lane_s32(a, b, v, 1);
}
#if defined(__aarch64__)
void test_vmls_laneq(int32x2_t a, int32x2_t b, int32x4_t v) {
vmls_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
vmls_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
vmls_laneq_s32(a, b, v, 0);
vmls_laneq_s32(a, b, v, 3);
}
void test_vmlsq_laneq(int32x4_t a, int32x4_t b, int32x4_t v) {
vmlsq_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
vmlsq_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
vmlsq_laneq_s32(a, b, v, 0);
vmlsq_laneq_s32(a, b, v, 3);
}
void test_vmlsl_high_lane(int64x2_t a, int32x4_t b, int32x2_t v) {
vmlsl_high_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
vmlsl_high_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
vmlsl_high_lane_s32(a, b, v, 0);
vmlsl_high_lane_s32(a, b, v, 1);
}
void test_vmlsl_high_laneq(int64x2_t a, int32x4_t b, int32x4_t v) {
vmlsl_high_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
vmlsl_high_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
vmlsl_high_laneq_s32(a, b, v, 0);
vmlsl_high_laneq_s32(a, b, v, 3);
}
#endif
void test_vmlsl_lane(int64x2_t a, int32x2_t b, int32x2_t v) {
vmlsl_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
vmlsl_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
vmlsl_lane_s32(a, b, v, 0);
vmlsl_lane_s32(a, b, v, 1);
}
#if defined(__aarch64__)
void test_vmlsl_laneq(int64x2_t a, int32x2_t b, int32x4_t v) {
vmlsl_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
vmlsl_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
vmlsl_laneq_s32(a, b, v, 0);
vmlsl_laneq_s32(a, b, v, 3);
}
#endif
void test_vmull_lane(int32x2_t a, int32x2_t b) {
vmull_lane_s32(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
vmull_lane_s32(a, b, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
vmull_lane_s32(a, b, 0);
vmull_lane_s32(a, b, 1);
}
#if defined(__aarch64__)
void test_vmull_laneq(int32x2_t a, int32x4_t b) {
vmull_laneq_s32(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
vmull_laneq_s32(a, b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
vmull_laneq_s32(a, b, 0);
vmull_laneq_s32(a, b, 3);
}
void test_vmull_high_lane(int32x4_t a, int32x2_t b) {
vmull_high_lane_s32(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
vmull_high_lane_s32(a, b, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
vmull_high_lane_s32(a, b, 0);
vmull_high_lane_s32(a, b, 1);
}
void test_vmull_high_laneq(int32x4_t a, int32x4_t b) {
vmull_high_laneq_s32(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
vmull_high_laneq_s32(a, b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
vmull_high_laneq_s32(a, b, 0);
vmull_high_laneq_s32(a, b, 3);
}
void test_vqdmlal_high_lane(int64x2_t a, int32x4_t b, int32x2_t v) {
vqdmlal_high_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
vqdmlal_high_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
vqdmlal_high_lane_s32(a, b, v, 0);
vqdmlal_high_lane_s32(a, b, v, 1);
}
void test_vqdmlal_high_laneq(int64x2_t a, int32x4_t b, int32x4_t v) {
vqdmlal_high_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
vqdmlal_high_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
vqdmlal_high_laneq_s32(a, b, v, 0);
vqdmlal_high_laneq_s32(a, b, v, 3);
}
#endif
void test_vqdmlal_lane(int64x2_t a, int32x2_t b, int32x2_t v) {
vqdmlal_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
vqdmlal_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
vqdmlal_lane_s32(a, b, v, 0);
vqdmlal_lane_s32(a, b, v, 1);
}
#if defined(__aarch64__)
void test_vqdmlal_laneq(int64x2_t a, int32x2_t b, int32x4_t v) {
vqdmlal_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
vqdmlal_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
vqdmlal_laneq_s32(a, b, v, 0);
vqdmlal_laneq_s32(a, b, v, 3);
}
void test_vqdmlsl_high_lane(int64x2_t a, int32x4_t b, int32x2_t v) {
vqdmlsl_high_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
vqdmlsl_high_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
vqdmlsl_high_lane_s32(a, b, v, 0);
vqdmlsl_high_lane_s32(a, b, v, 1);
}
void test_vqdmlsl_high_laneq(int64x2_t a, int32x4_t b, int32x4_t v) {
vqdmlsl_high_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
vqdmlsl_high_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
vqdmlsl_high_laneq_s32(a, b, v, 0);
vqdmlsl_high_laneq_s32(a, b, v, 3);
}
#endif
void test_vqdmlsl_lane(int64x2_t a, int32x2_t b, int32x2_t v) {
vqdmlsl_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
vqdmlsl_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
vqdmlsl_lane_s32(a, b, v, 0);
vqdmlsl_lane_s32(a, b, v, 1);
}
#if defined(__aarch64__)
void test_vqdmlsl_laneq(int64x2_t a, int32x2_t b, int32x4_t v) {
vqdmlsl_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
vqdmlsl_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
vqdmlsl_laneq_s32(a, b, v, 0);
vqdmlsl_laneq_s32(a, b, v, 3);
}
#endif
void test_vqdmulh_lane(int32x2_t a, int32x2_t b) {
vqdmulh_lane_s32(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
vqdmulh_lane_s32(a, b, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
vqdmulh_lane_s32(a, b, 0);
vqdmulh_lane_s32(a, b, 1);
}
#if defined(__aarch64__)
void test_vqdmulh_laneq(int32x2_t a, int32x4_t b) {
vqdmulh_laneq_s32(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
vqdmulh_laneq_s32(a, b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
vqdmulh_laneq_s32(a, b, 0);
vqdmulh_laneq_s32(a, b, 3);
}
void test_vqdmulhq_laneq(int32x4_t a, int32x4_t b) {
vqdmulhq_laneq_s32(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
vqdmulhq_laneq_s32(a, b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
vqdmulhq_laneq_s32(a, b, 0);
vqdmulhq_laneq_s32(a, b, 3);
}
void test_vqdmull_high_lane(int32x4_t a, int32x2_t b) {
vqdmull_high_lane_s32(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
vqdmull_high_lane_s32(a, b, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
vqdmull_high_lane_s32(a, b, 0);
vqdmull_high_lane_s32(a, b, 1);
}
void test_vqdmull_high_laneq(int32x4_t a, int32x4_t b) {
vqdmull_high_laneq_s32(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
vqdmull_high_laneq_s32(a, b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
vqdmull_high_laneq_s32(a, b, 0);
vqdmull_high_laneq_s32(a, b, 3);
}
#endif
void test_vqdmull_lane(int32x2_t a, int32x2_t v) {
vqdmull_lane_s32(a, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
vqdmull_lane_s32(a, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
vqdmull_lane_s32(a, v, 0);
vqdmull_lane_s32(a, v, 1);
}
#if defined(__aarch64__)
void test_vqdmull_laneq(int32x2_t a, int32x4_t v) {
vqdmull_laneq_s32(a, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
vqdmull_laneq_s32(a, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
vqdmull_laneq_s32(a, v, 0);
vqdmull_laneq_s32(a, v, 3);
}
#endif
void test_vqrdmlah_lane(int32x2_t a, int32x2_t b, int32x2_t v) {
vqrdmlah_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
vqrdmlah_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
vqrdmlah_lane_s32(a, b, v, 0);
vqrdmlah_lane_s32(a, b, v, 1);
}
void test_vqrdmlahq_lane(int32x4_t a, int32x4_t b, int32x2_t v) {
vqrdmlahq_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
vqrdmlahq_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
vqrdmlahq_lane_s32(a, b, v, 0);
vqrdmlahq_lane_s32(a, b, v, 1);
}
#if defined(__aarch64__)
void test_vqrdmlah_laneq(int32x2_t a, int32x2_t b, int32x4_t v) {
vqrdmlah_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
vqrdmlah_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
vqrdmlah_laneq_s32(a, b, v, 0);
vqrdmlah_laneq_s32(a, b, v, 3);
}
void test_vqrdmlahq_laneq(int32x4_t a, int32x4_t b, int32x4_t v) {
vqrdmlahq_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
vqrdmlahq_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
vqrdmlahq_laneq_s32(a, b, v, 0);
vqrdmlahq_laneq_s32(a, b, v, 3);
}
#endif
void test_vqrdmlsh_lane(int32x2_t a, int32x2_t b, int32x2_t v) {
vqrdmlsh_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
vqrdmlsh_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
vqrdmlsh_lane_s32(a, b, v, 0);
vqrdmlsh_lane_s32(a, b, v, 1);
}
void test_vqrdmlshq_lane(int32x4_t a, int32x4_t b, int32x2_t v) {
vqrdmlshq_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
vqrdmlshq_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
vqrdmlshq_lane_s32(a, b, v, 0);
vqrdmlshq_lane_s32(a, b, v, 1);
}
#if defined(__aarch64__)
void test_vqrdmlsh_laneq(int32x2_t a, int32x2_t b, int32x4_t v) {
vqrdmlsh_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
vqrdmlsh_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
vqrdmlsh_laneq_s32(a, b, v, 0);
vqrdmlsh_laneq_s32(a, b, v, 3);
}
void test_vqrdmlshq_laneq(int32x4_t a, int32x4_t b, int32x4_t v) {
vqrdmlshq_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
vqrdmlshq_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
vqrdmlshq_laneq_s32(a, b, v, 0);
vqrdmlshq_laneq_s32(a, b, v, 3);
}
#endif
void test_vqrdmulh_lane(int32x2_t a, int32x2_t v) {
vqrdmulh_lane_s32(a, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
vqrdmulh_lane_s32(a, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
vqrdmulh_lane_s32(a, v, 0);
vqrdmulh_lane_s32(a, v, 1);
}
#if defined(__aarch64__)
void test_vqrdmulh_laneq(int32x2_t a, int32x4_t v) {
vqrdmulh_laneq_s32(a, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
vqrdmulh_laneq_s32(a, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
vqrdmulh_laneq_s32(a, v, 0);
vqrdmulh_laneq_s32(a, v, 3);
}
void test_vqrdmulhq_laneq(int32x4_t a, int32x4_t v) {
vqrdmulhq_laneq_s32(a, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
vqrdmulhq_laneq_s32(a, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
vqrdmulhq_laneq_s32(a, v, 0);
vqrdmulhq_laneq_s32(a, v, 3);
}
#endif

View File

@ -773,19 +773,23 @@ float16x8_t test_vfmsq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
}
// CHECK-LABEL: test_vmul_lane_f16
// CHECK: [[TMP0:%.*]] = shufflevector <4 x half> %b, <4 x half> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK: [[MUL:%.*]] = fmul <4 x half> %a, [[TMP0]]
// CHECK: [[TMP0:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
// CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK: [[MUL:%.*]] = fmul <4 x half> [[A:%.*]], [[LANE]]
// CHECK: ret <4 x half> [[MUL]]
float16x4_t test_vmul_lane_f16(float16x4_t a, float16x4_t b) {
return vmul_lane_f16(a, b, 3);
}
// CHECK-LABEL: test_vmulq_lane_f16
// CHECK: [[TMP0:%.*]] = shufflevector <4 x half> %b, <4 x half> %b, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
// CHECK: [[MUL:%.*]] = fmul <8 x half> %a, [[TMP0]]
// CHECK: [[TMP0:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
// CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
// CHECK: [[MUL:%.*]] = fmul <8 x half> [[A:%.*]], [[LANE]]
// CHECK: ret <8 x half> [[MUL]]
float16x8_t test_vmulq_lane_f16(float16x8_t a, float16x4_t b) {
return vmulq_lane_f16(a, b, 7);
return vmulq_lane_f16(a, b, 3);
}
// CHECK-LABEL: test_vmul_n_f16
@ -939,17 +943,21 @@ float16x8_t test_vdupq_n_f16(float16_t a) {
}
// CHECK-LABEL: test_vdup_lane_f16
// CHECK: [[SHFL:%.*]] = shufflevector <4 x half> %a, <4 x half> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK: ret <4 x half> [[SHFL]]
// CHECK: [[TMP0:%.*]] = bitcast <4 x half> [[A:%.*]] to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
// CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK: ret <4 x half> [[LANE]]
float16x4_t test_vdup_lane_f16(float16x4_t a) {
return vdup_lane_f16(a, 3);
}
// CHECK-LABEL: test_vdupq_lane_f16
// CHECK: [[SHFL:%.*]] = shufflevector <4 x half> %a, <4 x half> %a, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
// CHECK: ret <8 x half> [[SHFL]]
// CHECK: [[TMP0:%.*]] = bitcast <4 x half> [[A:%.*]] to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
// CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
// CHECK: ret <8 x half> [[LANE]]
float16x8_t test_vdupq_lane_f16(float16x4_t a) {
return vdupq_lane_f16(a, 7);
return vdupq_lane_f16(a, 3);
}
// CHECK-LABEL: @test_vext_f16(

View File

@ -28,7 +28,9 @@ float32x4_t test_vdupq_n_f32(float32_t w) {
// this was in <rdar://problem/11778405>, but had already been implemented,
// test anyway
// CHECK-LABEL: define <2 x double> @test_vdupq_lane_f64(<1 x double> %V) #0 {
// CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x double> %V, <1 x double> %V, <2 x i32> zeroinitializer
// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %V to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
// CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x double> [[TMP1]], <1 x double> [[TMP1]], <2 x i32> zeroinitializer
// CHECK: ret <2 x double> [[SHUFFLE]]
float64x2_t test_vdupq_lane_f64(float64x1_t V) {
return vdupq_lane_f64(V, 0);

File diff suppressed because it is too large Load Diff

View File

@ -239,6 +239,11 @@ public:
NumVectors = 1;
}
void make32BitElement() {
assert_with_loc(Bitwidth > 32, "Not enough bits to make it 32!");
ElementBitwidth = 32;
}
void doubleLanes() {
assert_with_loc(Bitwidth != 128, "Can't get bigger than 128!");
Bitwidth = 128;
@ -1496,6 +1501,8 @@ std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCast(DagInit *DI,
castToType.doubleLanes();
} else if (SI->getAsUnquotedString() == "8") {
castToType.makeInteger(8, true);
} else if (SI->getAsUnquotedString() == "32") {
castToType.make32BitElement();
} else {
castToType = Type::fromTypedefName(SI->getAsUnquotedString());
assert_with_loc(!castToType.isVoid(), "Unknown typedef");