forked from OSchip/llvm-project
[ARM,MVE] Add intrinsics and isel for MVE integer VMLA.
Summary: These instructions compute multiply+add in integers, with one of the operands being a splat of a scalar. (VMLA and VMLAS differ in whether the splat operand is a multiplier or the addend.) I've represented these in IR using existing standard IR operations for the unpredicated forms. The predicated forms are done with target- specific intrinsics, as usual. When operating on n-bit vector lanes, only the bottom n bits of the i32 scalar operand are used. So we have to tell that to isel lowering, to allow it to remove a pointless sign- or zero-extension instruction on that input register. That's done in `PerformIntrinsicCombine`, but first I had to enable `PerformIntrinsicCombine` for MVE targets (previously all the intrinsics it handled were for NEON), and make it a method of `ARMTargetLowering` so that it can get at `SimplifyDemandedBits`. Reviewers: dmgreen, MarkMurrayARM, miyuki, ostannard Reviewed By: dmgreen Subscribers: kristof.beyls, hiraditya, danielkiss, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D76122
This commit is contained in:
parent
8d019cda85
commit
28c5d97bee
|
@ -202,6 +202,24 @@ let params = T.Float in {
|
|||
defm vfms: FMA<0>;
|
||||
}
|
||||
|
||||
let params = T.Int, pnt = PNT_NType in {
|
||||
def vmlaq_n: Intrinsic<
|
||||
Vector, (args Vector:$addend, Vector:$m1, unpromoted<Scalar>:$m2_s),
|
||||
(add (mul $m1, (splat $m2_s)), $addend)>;
|
||||
def vmlasq_n: Intrinsic<
|
||||
Vector, (args Vector:$m1, Vector:$m2, unpromoted<Scalar>:$addend_s),
|
||||
(add (mul $m1, $m2), (splat $addend_s))>;
|
||||
|
||||
def vmlaq_m_n: Intrinsic<
|
||||
Vector, (args Vector:$addend, Vector:$m1, Scalar:$m2_s, Predicate:$pred),
|
||||
(IRInt<"vmla_n_predicated", [Vector, Predicate]>
|
||||
$addend, $m1, $m2_s, $pred)>;
|
||||
def vmlasq_m_n: Intrinsic<
|
||||
Vector, (args Vector:$m1, Vector:$m2, Scalar:$addend_s, Predicate:$pred),
|
||||
(IRInt<"vmlas_n_predicated", [Vector, Predicate]>
|
||||
$m1, $m2, $addend_s, $pred)>;
|
||||
}
|
||||
|
||||
let params = !listconcat(T.Int16, T.Int32) in {
|
||||
let pnt = PNT_None in {
|
||||
def vmvnq_n: Intrinsic<Vector, (args imm_simd_vmvn:$imm),
|
||||
|
|
|
@ -124,6 +124,198 @@ float32x4_t test_vfmsq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
|
|||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vmlaq_n_s8(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[C:%.*]], i32 0
|
||||
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = mul <16 x i8> [[B:%.*]], [[DOTSPLAT]]
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = add <16 x i8> [[TMP0]], [[A:%.*]]
|
||||
// CHECK-NEXT: ret <16 x i8> [[TMP1]]
|
||||
//
|
||||
int8x16_t test_vmlaq_n_s8(int8x16_t a, int8x16_t b, int8_t c) {
|
||||
#ifdef POLYMORPHIC
|
||||
return vmlaq(a, b, c);
|
||||
#else /* POLYMORPHIC */
|
||||
return vmlaq_n_s8(a, b, c);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vmlaq_n_s16(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[C:%.*]], i32 0
|
||||
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = mul <8 x i16> [[B:%.*]], [[DOTSPLAT]]
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = add <8 x i16> [[TMP0]], [[A:%.*]]
|
||||
// CHECK-NEXT: ret <8 x i16> [[TMP1]]
|
||||
//
|
||||
int16x8_t test_vmlaq_n_s16(int16x8_t a, int16x8_t b, int16_t c) {
|
||||
#ifdef POLYMORPHIC
|
||||
return vmlaq(a, b, c);
|
||||
#else /* POLYMORPHIC */
|
||||
return vmlaq_n_s16(a, b, c);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vmlaq_n_s32(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[C:%.*]], i32 0
|
||||
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = mul <4 x i32> [[B:%.*]], [[DOTSPLAT]]
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[TMP0]], [[A:%.*]]
|
||||
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
|
||||
//
|
||||
int32x4_t test_vmlaq_n_s32(int32x4_t a, int32x4_t b, int32_t c) {
|
||||
#ifdef POLYMORPHIC
|
||||
return vmlaq(a, b, c);
|
||||
#else /* POLYMORPHIC */
|
||||
return vmlaq_n_s32(a, b, c);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vmlaq_n_u8(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[C:%.*]], i32 0
|
||||
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = mul <16 x i8> [[B:%.*]], [[DOTSPLAT]]
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = add <16 x i8> [[TMP0]], [[A:%.*]]
|
||||
// CHECK-NEXT: ret <16 x i8> [[TMP1]]
|
||||
//
|
||||
uint8x16_t test_vmlaq_n_u8(uint8x16_t a, uint8x16_t b, uint8_t c) {
|
||||
#ifdef POLYMORPHIC
|
||||
return vmlaq(a, b, c);
|
||||
#else /* POLYMORPHIC */
|
||||
return vmlaq_n_u8(a, b, c);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vmlaq_n_u16(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[C:%.*]], i32 0
|
||||
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = mul <8 x i16> [[B:%.*]], [[DOTSPLAT]]
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = add <8 x i16> [[TMP0]], [[A:%.*]]
|
||||
// CHECK-NEXT: ret <8 x i16> [[TMP1]]
|
||||
//
|
||||
uint16x8_t test_vmlaq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) {
|
||||
#ifdef POLYMORPHIC
|
||||
return vmlaq(a, b, c);
|
||||
#else /* POLYMORPHIC */
|
||||
return vmlaq_n_u16(a, b, c);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vmlaq_n_u32(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[C:%.*]], i32 0
|
||||
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = mul <4 x i32> [[B:%.*]], [[DOTSPLAT]]
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[TMP0]], [[A:%.*]]
|
||||
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
|
||||
//
|
||||
uint32x4_t test_vmlaq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) {
|
||||
#ifdef POLYMORPHIC
|
||||
return vmlaq(a, b, c);
|
||||
#else /* POLYMORPHIC */
|
||||
return vmlaq_n_u32(a, b, c);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vmlasq_n_s8(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = mul <16 x i8> [[A:%.*]], [[B:%.*]]
|
||||
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[C:%.*]], i32 0
|
||||
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = add <16 x i8> [[TMP0]], [[DOTSPLAT]]
|
||||
// CHECK-NEXT: ret <16 x i8> [[TMP1]]
|
||||
//
|
||||
int8x16_t test_vmlasq_n_s8(int8x16_t a, int8x16_t b, int8_t c) {
|
||||
#ifdef POLYMORPHIC
|
||||
return vmlasq(a, b, c);
|
||||
#else /* POLYMORPHIC */
|
||||
return vmlasq_n_s8(a, b, c);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vmlasq_n_s16(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = mul <8 x i16> [[A:%.*]], [[B:%.*]]
|
||||
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[C:%.*]], i32 0
|
||||
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = add <8 x i16> [[TMP0]], [[DOTSPLAT]]
|
||||
// CHECK-NEXT: ret <8 x i16> [[TMP1]]
|
||||
//
|
||||
int16x8_t test_vmlasq_n_s16(int16x8_t a, int16x8_t b, int16_t c) {
|
||||
#ifdef POLYMORPHIC
|
||||
return vmlasq(a, b, c);
|
||||
#else /* POLYMORPHIC */
|
||||
return vmlasq_n_s16(a, b, c);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vmlasq_n_s32(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = mul <4 x i32> [[A:%.*]], [[B:%.*]]
|
||||
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[C:%.*]], i32 0
|
||||
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[TMP0]], [[DOTSPLAT]]
|
||||
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
|
||||
//
|
||||
int32x4_t test_vmlasq_n_s32(int32x4_t a, int32x4_t b, int32_t c) {
|
||||
#ifdef POLYMORPHIC
|
||||
return vmlasq(a, b, c);
|
||||
#else /* POLYMORPHIC */
|
||||
return vmlasq_n_s32(a, b, c);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vmlasq_n_u8(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = mul <16 x i8> [[A:%.*]], [[B:%.*]]
|
||||
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[C:%.*]], i32 0
|
||||
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = add <16 x i8> [[TMP0]], [[DOTSPLAT]]
|
||||
// CHECK-NEXT: ret <16 x i8> [[TMP1]]
|
||||
//
|
||||
uint8x16_t test_vmlasq_n_u8(uint8x16_t a, uint8x16_t b, uint8_t c) {
|
||||
#ifdef POLYMORPHIC
|
||||
return vmlasq(a, b, c);
|
||||
#else /* POLYMORPHIC */
|
||||
return vmlasq_n_u8(a, b, c);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vmlasq_n_u16(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = mul <8 x i16> [[A:%.*]], [[B:%.*]]
|
||||
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[C:%.*]], i32 0
|
||||
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = add <8 x i16> [[TMP0]], [[DOTSPLAT]]
|
||||
// CHECK-NEXT: ret <8 x i16> [[TMP1]]
|
||||
//
|
||||
uint16x8_t test_vmlasq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) {
|
||||
#ifdef POLYMORPHIC
|
||||
return vmlasq(a, b, c);
|
||||
#else /* POLYMORPHIC */
|
||||
return vmlasq_n_u16(a, b, c);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vmlasq_n_u32(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = mul <4 x i32> [[A:%.*]], [[B:%.*]]
|
||||
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[C:%.*]], i32 0
|
||||
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[TMP0]], [[DOTSPLAT]]
|
||||
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
|
||||
//
|
||||
uint32x4_t test_vmlasq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) {
|
||||
#ifdef POLYMORPHIC
|
||||
return vmlasq(a, b, c);
|
||||
#else /* POLYMORPHIC */
|
||||
return vmlasq_n_u32(a, b, c);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vfmaq_m_f16(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
|
@ -259,3 +451,191 @@ float32x4_t test_vfmsq_m_f32(float32x4_t a, float32x4_t b, float32x4_t c, mve_pr
|
|||
return vfmsq_m_f32(a, b, c, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vmlaq_m_n_s8(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.arm.mve.vmla.n.predicated.v16i8.v16i1(<16 x i8> [[B:%.*]], <16 x i8> [[A:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]])
|
||||
// CHECK-NEXT: ret <16 x i8> [[TMP3]]
|
||||
//
|
||||
int8x16_t test_vmlaq_m_n_s8(int8x16_t a, int8x16_t b, int8_t c, mve_pred16_t p) {
|
||||
#ifdef POLYMORPHIC
|
||||
return vmlaq_m(a, b, c, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vmlaq_m_n_s8(a, b, c, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vmlaq_m_n_s16(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.arm.mve.vmla.n.predicated.v8i16.v8i1(<8 x i16> [[B:%.*]], <8 x i16> [[A:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]])
|
||||
// CHECK-NEXT: ret <8 x i16> [[TMP3]]
|
||||
//
|
||||
int16x8_t test_vmlaq_m_n_s16(int16x8_t a, int16x8_t b, int16_t c, mve_pred16_t p) {
|
||||
#ifdef POLYMORPHIC
|
||||
return vmlaq_m(a, b, c, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vmlaq_m_n_s16(a, b, c, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vmlaq_m_n_s32(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmla.n.predicated.v4i32.v4i1(<4 x i32> [[B:%.*]], <4 x i32> [[A:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]])
|
||||
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
|
||||
//
|
||||
int32x4_t test_vmlaq_m_n_s32(int32x4_t a, int32x4_t b, int32_t c, mve_pred16_t p) {
|
||||
#ifdef POLYMORPHIC
|
||||
return vmlaq_m(a, b, c, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vmlaq_m_n_s32(a, b, c, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vmlaq_m_n_u8(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.arm.mve.vmla.n.predicated.v16i8.v16i1(<16 x i8> [[B:%.*]], <16 x i8> [[A:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]])
|
||||
// CHECK-NEXT: ret <16 x i8> [[TMP3]]
|
||||
//
|
||||
uint8x16_t test_vmlaq_m_n_u8(uint8x16_t a, uint8x16_t b, uint8_t c, mve_pred16_t p) {
|
||||
#ifdef POLYMORPHIC
|
||||
return vmlaq_m(a, b, c, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vmlaq_m_n_u8(a, b, c, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vmlaq_m_n_u16(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.arm.mve.vmla.n.predicated.v8i16.v8i1(<8 x i16> [[B:%.*]], <8 x i16> [[A:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]])
|
||||
// CHECK-NEXT: ret <8 x i16> [[TMP3]]
|
||||
//
|
||||
uint16x8_t test_vmlaq_m_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c, mve_pred16_t p) {
|
||||
#ifdef POLYMORPHIC
|
||||
return vmlaq_m(a, b, c, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vmlaq_m_n_u16(a, b, c, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vmlaq_m_n_u32(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmla.n.predicated.v4i32.v4i1(<4 x i32> [[B:%.*]], <4 x i32> [[A:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]])
|
||||
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
|
||||
//
|
||||
uint32x4_t test_vmlaq_m_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c, mve_pred16_t p) {
|
||||
#ifdef POLYMORPHIC
|
||||
return vmlaq_m(a, b, c, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vmlaq_m_n_u32(a, b, c, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vmlasq_m_n_s8(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.arm.mve.vmlas.n.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]])
|
||||
// CHECK-NEXT: ret <16 x i8> [[TMP3]]
|
||||
//
|
||||
int8x16_t test_vmlasq_m_n_s8(int8x16_t a, int8x16_t b, int8_t c, mve_pred16_t p) {
|
||||
#ifdef POLYMORPHIC
|
||||
return vmlasq_m(a, b, c, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vmlasq_m_n_s8(a, b, c, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vmlasq_m_n_s16(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.arm.mve.vmlas.n.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]])
|
||||
// CHECK-NEXT: ret <8 x i16> [[TMP3]]
|
||||
//
|
||||
int16x8_t test_vmlasq_m_n_s16(int16x8_t a, int16x8_t b, int16_t c, mve_pred16_t p) {
|
||||
#ifdef POLYMORPHIC
|
||||
return vmlasq_m(a, b, c, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vmlasq_m_n_s16(a, b, c, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vmlasq_m_n_s32(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmlas.n.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]])
|
||||
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
|
||||
//
|
||||
int32x4_t test_vmlasq_m_n_s32(int32x4_t a, int32x4_t b, int32_t c, mve_pred16_t p) {
|
||||
#ifdef POLYMORPHIC
|
||||
return vmlasq_m(a, b, c, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vmlasq_m_n_s32(a, b, c, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vmlasq_m_n_u8(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.arm.mve.vmlas.n.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]])
|
||||
// CHECK-NEXT: ret <16 x i8> [[TMP3]]
|
||||
//
|
||||
uint8x16_t test_vmlasq_m_n_u8(uint8x16_t a, uint8x16_t b, uint8_t c, mve_pred16_t p) {
|
||||
#ifdef POLYMORPHIC
|
||||
return vmlasq_m(a, b, c, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vmlasq_m_n_u8(a, b, c, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vmlasq_m_n_u16(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.arm.mve.vmlas.n.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]])
|
||||
// CHECK-NEXT: ret <8 x i16> [[TMP3]]
|
||||
//
|
||||
uint16x8_t test_vmlasq_m_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c, mve_pred16_t p) {
|
||||
#ifdef POLYMORPHIC
|
||||
return vmlasq_m(a, b, c, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vmlasq_m_n_u16(a, b, c, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vmlasq_m_n_u32(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmlas.n.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]])
|
||||
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
|
||||
//
|
||||
uint32x4_t test_vmlasq_m_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c, mve_pred16_t p) {
|
||||
#ifdef POLYMORPHIC
|
||||
return vmlasq_m(a, b, c, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vmlasq_m_n_u32(a, b, c, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
|
|
@ -1246,6 +1246,14 @@ def int_arm_mve_vqmovn_predicated: Intrinsic<[llvm_anyvector_ty],
|
|||
def int_arm_mve_fma_predicated: Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* mult op #2 */,
|
||||
LLVMMatchType<0> /* addend */, llvm_anyvector_ty /* pred */], [IntrNoMem]>;
|
||||
def int_arm_mve_vmla_n_predicated: Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* addend */,
|
||||
llvm_i32_ty /* mult op #2 (scalar) */, llvm_anyvector_ty /* pred */],
|
||||
[IntrNoMem]>;
|
||||
def int_arm_mve_vmlas_n_predicated: Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* mult op #2 */,
|
||||
llvm_i32_ty /* addend (scalar) */, llvm_anyvector_ty /* pred */],
|
||||
[IntrNoMem]>;
|
||||
|
||||
// CDE (Custom Datapath Extension)
|
||||
|
||||
|
|
|
@ -911,7 +911,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
|
|||
setOperationAction(ISD::FMA, MVT::v4f32, Expand);
|
||||
}
|
||||
|
||||
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
|
||||
setTargetDAGCombine(ISD::SHL);
|
||||
setTargetDAGCombine(ISD::SRL);
|
||||
setTargetDAGCombine(ISD::SRA);
|
||||
|
@ -939,6 +938,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
|
|||
setTargetDAGCombine(ISD::SIGN_EXTEND);
|
||||
setTargetDAGCombine(ISD::ZERO_EXTEND);
|
||||
setTargetDAGCombine(ISD::ANY_EXTEND);
|
||||
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
|
||||
setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
|
||||
setTargetDAGCombine(ISD::INTRINSIC_VOID);
|
||||
setTargetDAGCombine(ISD::VECREDUCE_ADD);
|
||||
|
@ -14173,7 +14173,9 @@ static SDValue PerformLongShiftCombine(SDNode *N, SelectionDAG &DAG) {
|
|||
}
|
||||
|
||||
/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
|
||||
static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
|
||||
SDValue ARMTargetLowering::PerformIntrinsicCombine(SDNode *N,
|
||||
DAGCombinerInfo &DCI) const {
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
|
||||
switch (IntNo) {
|
||||
default:
|
||||
|
@ -14322,6 +14324,19 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
|
|||
case Intrinsic::arm_neon_vqrshiftu:
|
||||
// No immediate versions of these to check for.
|
||||
break;
|
||||
|
||||
case Intrinsic::arm_mve_vmla_n_predicated:
|
||||
case Intrinsic::arm_mve_vmlas_n_predicated: {
|
||||
// These intrinsics all take an i32 scalar operand which is narrowed to the
|
||||
// size of a single lane of the vector type they return. So we don't need
|
||||
// any bits of that operand above that point, which allows us to eliminate
|
||||
// uxth/sxth.
|
||||
unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
|
||||
APInt DemandedMask = APInt::getLowBitsSet(32, BitWidth);
|
||||
if (SimplifyDemandedBits(N->getOperand(3), DemandedMask, DCI))
|
||||
return SDValue();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
|
@ -15041,7 +15056,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
|
|||
return PerformVCVTCombine(N, DCI.DAG, Subtarget);
|
||||
case ISD::FDIV:
|
||||
return PerformVDIVCombine(N, DCI.DAG, Subtarget);
|
||||
case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
|
||||
case ISD::INTRINSIC_WO_CHAIN:
|
||||
return PerformIntrinsicCombine(N, DCI);
|
||||
case ISD::SHL:
|
||||
case ISD::SRA:
|
||||
case ISD::SRL:
|
||||
|
|
|
@ -352,6 +352,7 @@ class VectorType;
|
|||
SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const;
|
||||
SDValue PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const;
|
||||
SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const;
|
||||
SDValue PerformIntrinsicCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
|
||||
|
||||
bool SimplifyDemandedBitsForTargetNode(SDValue Op,
|
||||
|
|
|
@ -5152,48 +5152,49 @@ class MVE_VFMAMLA_qr<string iname, string suffix,
|
|||
let validForTailPredication = 1;
|
||||
}
|
||||
|
||||
def MVE_VMLA_qr_s8 : MVE_VFMAMLA_qr<"vmla", "s8", 0b0, 0b00, 0b0>;
|
||||
def MVE_VMLA_qr_s16 : MVE_VFMAMLA_qr<"vmla", "s16", 0b0, 0b01, 0b0>;
|
||||
def MVE_VMLA_qr_s32 : MVE_VFMAMLA_qr<"vmla", "s32", 0b0, 0b10, 0b0>;
|
||||
def MVE_VMLA_qr_u8 : MVE_VFMAMLA_qr<"vmla", "u8", 0b1, 0b00, 0b0>;
|
||||
def MVE_VMLA_qr_u16 : MVE_VFMAMLA_qr<"vmla", "u16", 0b1, 0b01, 0b0>;
|
||||
def MVE_VMLA_qr_u32 : MVE_VFMAMLA_qr<"vmla", "u32", 0b1, 0b10, 0b0>;
|
||||
multiclass MVE_VMLA_qr_multi<string iname, MVEVectorVTInfo VTI,
|
||||
bit scalar_addend> {
|
||||
def "": MVE_VFMAMLA_qr<iname, VTI.Suffix, VTI.Unsigned, VTI.Size,
|
||||
scalar_addend>;
|
||||
defvar Inst = !cast<Instruction>(NAME);
|
||||
defvar pred_int = !cast<Intrinsic>("int_arm_mve_" # iname # "_n_predicated");
|
||||
defvar v1 = (VTI.Vec MQPR:$v1);
|
||||
defvar v2 = (VTI.Vec MQPR:$v2);
|
||||
defvar vs = (VTI.Vec (ARMvdup rGPR:$s));
|
||||
defvar s = (i32 rGPR:$s);
|
||||
defvar pred = (VTI.Pred VCCR:$pred);
|
||||
|
||||
def MVE_VMLAS_qr_s8 : MVE_VFMAMLA_qr<"vmlas", "s8", 0b0, 0b00, 0b1>;
|
||||
def MVE_VMLAS_qr_s16 : MVE_VFMAMLA_qr<"vmlas", "s16", 0b0, 0b01, 0b1>;
|
||||
def MVE_VMLAS_qr_s32 : MVE_VFMAMLA_qr<"vmlas", "s32", 0b0, 0b10, 0b1>;
|
||||
def MVE_VMLAS_qr_u8 : MVE_VFMAMLA_qr<"vmlas", "u8", 0b1, 0b00, 0b1>;
|
||||
def MVE_VMLAS_qr_u16 : MVE_VFMAMLA_qr<"vmlas", "u16", 0b1, 0b01, 0b1>;
|
||||
def MVE_VMLAS_qr_u32 : MVE_VFMAMLA_qr<"vmlas", "u32", 0b1, 0b10, 0b1>;
|
||||
// The signed and unsigned variants of this instruction have different
|
||||
// encodings, but they're functionally identical. For the sake of
|
||||
// determinism, we generate only the unsigned variant.
|
||||
if VTI.Unsigned then let Predicates = [HasMVEInt] in {
|
||||
if scalar_addend then {
|
||||
def : Pat<(VTI.Vec (add (mul v1, v2), vs)),
|
||||
(VTI.Vec (Inst v1, v2, s))>;
|
||||
} else {
|
||||
def : Pat<(VTI.Vec (add (mul v2, vs), v1)),
|
||||
(VTI.Vec (Inst v1, v2, s))>;
|
||||
}
|
||||
|
||||
let Predicates = [HasMVEInt] in {
|
||||
def : Pat<(v4i32 (add (v4i32 MQPR:$src1),
|
||||
(v4i32 (mul (v4i32 MQPR:$src2),
|
||||
(v4i32 (ARMvdup (i32 rGPR:$x))))))),
|
||||
(v4i32 (MVE_VMLA_qr_u32 $src1, $src2, $x))>;
|
||||
def : Pat<(v8i16 (add (v8i16 MQPR:$src1),
|
||||
(v8i16 (mul (v8i16 MQPR:$src2),
|
||||
(v8i16 (ARMvdup (i32 rGPR:$x))))))),
|
||||
(v8i16 (MVE_VMLA_qr_u16 $src1, $src2, $x))>;
|
||||
def : Pat<(v16i8 (add (v16i8 MQPR:$src1),
|
||||
(v16i8 (mul (v16i8 MQPR:$src2),
|
||||
(v16i8 (ARMvdup (i32 rGPR:$x))))))),
|
||||
(v16i8 (MVE_VMLA_qr_u8 $src1, $src2, $x))>;
|
||||
|
||||
def : Pat<(v4i32 (add (v4i32 (ARMvdup (i32 rGPR:$x))),
|
||||
(v4i32 (mul (v4i32 MQPR:$src1),
|
||||
(v4i32 MQPR:$src2))))),
|
||||
(v4i32 (MVE_VMLAS_qr_u32 $src1, $src2, $x))>;
|
||||
def : Pat<(v8i16 (add (v8i16 (ARMvdup (i32 rGPR:$x))),
|
||||
(v8i16 (mul (v8i16 MQPR:$src1),
|
||||
(v8i16 MQPR:$src2))))),
|
||||
(v8i16 (MVE_VMLAS_qr_u16 $src1, $src2, $x))>;
|
||||
def : Pat<(v16i8 (add (v16i8 (ARMvdup (i32 rGPR:$x))),
|
||||
(v16i8 (mul (v16i8 MQPR:$src1),
|
||||
(v16i8 MQPR:$src2))))),
|
||||
(v16i8 (MVE_VMLAS_qr_u8 $src1, $src2, $x))>;
|
||||
def : Pat<(VTI.Vec (pred_int v1, v2, s, pred)),
|
||||
(VTI.Vec (Inst v1, v2, s, ARMVCCThen, pred))>;
|
||||
}
|
||||
}
|
||||
|
||||
defm MVE_VMLA_qr_s8 : MVE_VMLA_qr_multi<"vmla", MVE_v16s8, 0b0>;
|
||||
defm MVE_VMLA_qr_s16 : MVE_VMLA_qr_multi<"vmla", MVE_v8s16, 0b0>;
|
||||
defm MVE_VMLA_qr_s32 : MVE_VMLA_qr_multi<"vmla", MVE_v4s32, 0b0>;
|
||||
defm MVE_VMLA_qr_u8 : MVE_VMLA_qr_multi<"vmla", MVE_v16u8, 0b0>;
|
||||
defm MVE_VMLA_qr_u16 : MVE_VMLA_qr_multi<"vmla", MVE_v8u16, 0b0>;
|
||||
defm MVE_VMLA_qr_u32 : MVE_VMLA_qr_multi<"vmla", MVE_v4u32, 0b0>;
|
||||
|
||||
defm MVE_VMLAS_qr_s8 : MVE_VMLA_qr_multi<"vmlas", MVE_v16s8, 0b1>;
|
||||
defm MVE_VMLAS_qr_s16 : MVE_VMLA_qr_multi<"vmlas", MVE_v8s16, 0b1>;
|
||||
defm MVE_VMLAS_qr_s32 : MVE_VMLA_qr_multi<"vmlas", MVE_v4s32, 0b1>;
|
||||
defm MVE_VMLAS_qr_u8 : MVE_VMLA_qr_multi<"vmlas", MVE_v16u8, 0b1>;
|
||||
defm MVE_VMLAS_qr_u16 : MVE_VMLA_qr_multi<"vmlas", MVE_v8u16, 0b1>;
|
||||
defm MVE_VMLAS_qr_u32 : MVE_VMLA_qr_multi<"vmlas", MVE_v4u32, 0b1>;
|
||||
|
||||
multiclass MVE_VFMA_qr_multi<string iname, MVEVectorVTInfo VTI,
|
||||
bit scalar_addend> {
|
||||
def "": MVE_VFMAMLA_qr<iname, VTI.Suffix, VTI.Size{0}, 0b11, scalar_addend>;
|
||||
|
|
|
@ -101,6 +101,168 @@ entry:
|
|||
ret <4 x float> %1
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @test_vmlaq_n_s8(<16 x i8> %a, <16 x i8> %b, i8 signext %c) {
|
||||
; CHECK-LABEL: test_vmlaq_n_s8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmla.u8 q0, q1, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%.splatinsert = insertelement <16 x i8> undef, i8 %c, i32 0
|
||||
%.splat = shufflevector <16 x i8> %.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
|
||||
%0 = mul <16 x i8> %.splat, %b
|
||||
%1 = add <16 x i8> %0, %a
|
||||
ret <16 x i8> %1
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @test_vmlaq_n_s16(<8 x i16> %a, <8 x i16> %b, i16 signext %c) {
|
||||
; CHECK-LABEL: test_vmlaq_n_s16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmla.u16 q0, q1, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%.splatinsert = insertelement <8 x i16> undef, i16 %c, i32 0
|
||||
%.splat = shufflevector <8 x i16> %.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
|
||||
%0 = mul <8 x i16> %.splat, %b
|
||||
%1 = add <8 x i16> %0, %a
|
||||
ret <8 x i16> %1
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @test_vmlaq_n_s32(<4 x i32> %a, <4 x i32> %b, i32 %c) {
|
||||
; CHECK-LABEL: test_vmlaq_n_s32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmla.u32 q0, q1, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%.splatinsert = insertelement <4 x i32> undef, i32 %c, i32 0
|
||||
%.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
%0 = mul <4 x i32> %.splat, %b
|
||||
%1 = add <4 x i32> %0, %a
|
||||
ret <4 x i32> %1
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @test_vmlaq_n_u8(<16 x i8> %a, <16 x i8> %b, i8 zeroext %c) {
|
||||
; CHECK-LABEL: test_vmlaq_n_u8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmla.u8 q0, q1, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%.splatinsert = insertelement <16 x i8> undef, i8 %c, i32 0
|
||||
%.splat = shufflevector <16 x i8> %.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
|
||||
%0 = mul <16 x i8> %.splat, %b
|
||||
%1 = add <16 x i8> %0, %a
|
||||
ret <16 x i8> %1
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @test_vmlaq_n_u16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %c) {
|
||||
; CHECK-LABEL: test_vmlaq_n_u16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmla.u16 q0, q1, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%.splatinsert = insertelement <8 x i16> undef, i16 %c, i32 0
|
||||
%.splat = shufflevector <8 x i16> %.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
|
||||
%0 = mul <8 x i16> %.splat, %b
|
||||
%1 = add <8 x i16> %0, %a
|
||||
ret <8 x i16> %1
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @test_vmlaq_n_u32(<4 x i32> %a, <4 x i32> %b, i32 %c) {
|
||||
; CHECK-LABEL: test_vmlaq_n_u32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmla.u32 q0, q1, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%.splatinsert = insertelement <4 x i32> undef, i32 %c, i32 0
|
||||
%.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
%0 = mul <4 x i32> %.splat, %b
|
||||
%1 = add <4 x i32> %0, %a
|
||||
ret <4 x i32> %1
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @test_vmlasq_n_s8(<16 x i8> %a, <16 x i8> %b, i8 signext %c) {
|
||||
; CHECK-LABEL: test_vmlasq_n_s8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmlas.u8 q1, q0, r0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = mul <16 x i8> %b, %a
|
||||
%.splatinsert = insertelement <16 x i8> undef, i8 %c, i32 0
|
||||
%.splat = shufflevector <16 x i8> %.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
|
||||
%1 = add <16 x i8> %.splat, %0
|
||||
ret <16 x i8> %1
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @test_vmlasq_n_s16(<8 x i16> %a, <8 x i16> %b, i16 signext %c) {
|
||||
; CHECK-LABEL: test_vmlasq_n_s16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmlas.u16 q1, q0, r0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = mul <8 x i16> %b, %a
|
||||
%.splatinsert = insertelement <8 x i16> undef, i16 %c, i32 0
|
||||
%.splat = shufflevector <8 x i16> %.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
|
||||
%1 = add <8 x i16> %.splat, %0
|
||||
ret <8 x i16> %1
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @test_vmlasq_n_s32(<4 x i32> %a, <4 x i32> %b, i32 %c) {
|
||||
; CHECK-LABEL: test_vmlasq_n_s32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmlas.u32 q1, q0, r0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = mul <4 x i32> %b, %a
|
||||
%.splatinsert = insertelement <4 x i32> undef, i32 %c, i32 0
|
||||
%.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
%1 = add <4 x i32> %.splat, %0
|
||||
ret <4 x i32> %1
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @test_vmlasq_n_u8(<16 x i8> %a, <16 x i8> %b, i8 zeroext %c) {
|
||||
; CHECK-LABEL: test_vmlasq_n_u8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmlas.u8 q1, q0, r0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = mul <16 x i8> %b, %a
|
||||
%.splatinsert = insertelement <16 x i8> undef, i8 %c, i32 0
|
||||
%.splat = shufflevector <16 x i8> %.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
|
||||
%1 = add <16 x i8> %.splat, %0
|
||||
ret <16 x i8> %1
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @test_vmlasq_n_u16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %c) {
|
||||
; CHECK-LABEL: test_vmlasq_n_u16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmlas.u16 q1, q0, r0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = mul <8 x i16> %b, %a
|
||||
%.splatinsert = insertelement <8 x i16> undef, i16 %c, i32 0
|
||||
%.splat = shufflevector <8 x i16> %.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
|
||||
%1 = add <8 x i16> %.splat, %0
|
||||
ret <8 x i16> %1
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @test_vmlasq_n_u32(<4 x i32> %a, <4 x i32> %b, i32 %c) {
|
||||
; CHECK-LABEL: test_vmlasq_n_u32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmlas.u32 q1, q0, r0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = mul <4 x i32> %b, %a
|
||||
%.splatinsert = insertelement <4 x i32> undef, i32 %c, i32 0
|
||||
%.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
%1 = add <4 x i32> %.splat, %0
|
||||
ret <4 x i32> %1
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x half> @test_vfmaq_m_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vfmaq_m_f16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
|
@ -233,6 +395,183 @@ entry:
|
|||
ret <4 x float> %3
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @test_vmlaq_m_n_s8(<16 x i8> %a, <16 x i8> %b, i8 signext %c, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vmlaq_m_n_s8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vmlat.u8 q0, q1, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i8 %c to i32
|
||||
%1 = zext i16 %p to i32
|
||||
%2 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
|
||||
%3 = tail call <16 x i8> @llvm.arm.mve.vmla.n.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 %0, <16 x i1> %2)
|
||||
ret <16 x i8> %3
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @test_vmlaq_m_n_s16(<8 x i16> %a, <8 x i16> %b, i16 signext %c, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vmlaq_m_n_s16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vmlat.u16 q0, q1, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %c to i32
|
||||
%1 = zext i16 %p to i32
|
||||
%2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
|
||||
%3 = tail call <8 x i16> @llvm.arm.mve.vmla.n.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 %0, <8 x i1> %2)
|
||||
ret <8 x i16> %3
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @test_vmlaq_m_n_s32(<4 x i32> %a, <4 x i32> %b, i32 %c, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vmlaq_m_n_s32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vmlat.u32 q0, q1, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
||||
%2 = tail call <4 x i32> @llvm.arm.mve.vmla.n.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 %c, <4 x i1> %1)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @test_vmlaq_m_n_u8(<16 x i8> %a, <16 x i8> %b, i8 zeroext %c, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vmlaq_m_n_u8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vmlat.u8 q0, q1, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i8 %c to i32
|
||||
%1 = zext i16 %p to i32
|
||||
%2 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
|
||||
%3 = tail call <16 x i8> @llvm.arm.mve.vmla.n.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 %0, <16 x i1> %2)
|
||||
ret <16 x i8> %3
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @test_vmlaq_m_n_u16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %c, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vmlaq_m_n_u16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vmlat.u16 q0, q1, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %c to i32
|
||||
%1 = zext i16 %p to i32
|
||||
%2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
|
||||
%3 = tail call <8 x i16> @llvm.arm.mve.vmla.n.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 %0, <8 x i1> %2)
|
||||
ret <8 x i16> %3
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @test_vmlaq_m_n_u32(<4 x i32> %a, <4 x i32> %b, i32 %c, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vmlaq_m_n_u32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vmlat.u32 q0, q1, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
||||
%2 = tail call <4 x i32> @llvm.arm.mve.vmla.n.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 %c, <4 x i1> %1)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @test_vmlasq_m_n_s8(<16 x i8> %a, <16 x i8> %b, i8 signext %c, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vmlasq_m_n_s8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vmlast.u8 q0, q1, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i8 %c to i32
|
||||
%1 = zext i16 %p to i32
|
||||
%2 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
|
||||
%3 = tail call <16 x i8> @llvm.arm.mve.vmlas.n.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 %0, <16 x i1> %2)
|
||||
ret <16 x i8> %3
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @test_vmlasq_m_n_s16(<8 x i16> %a, <8 x i16> %b, i16 signext %c, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vmlasq_m_n_s16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vmlast.u16 q0, q1, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %c to i32
|
||||
%1 = zext i16 %p to i32
|
||||
%2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
|
||||
%3 = tail call <8 x i16> @llvm.arm.mve.vmlas.n.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 %0, <8 x i1> %2)
|
||||
ret <8 x i16> %3
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @test_vmlasq_m_n_s32(<4 x i32> %a, <4 x i32> %b, i32 %c, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vmlasq_m_n_s32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vmlast.u32 q0, q1, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
||||
%2 = tail call <4 x i32> @llvm.arm.mve.vmlas.n.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 %c, <4 x i1> %1)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @test_vmlasq_m_n_u8(<16 x i8> %a, <16 x i8> %b, i8 zeroext %c, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vmlasq_m_n_u8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vmlast.u8 q0, q1, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i8 %c to i32
|
||||
%1 = zext i16 %p to i32
|
||||
%2 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
|
||||
%3 = tail call <16 x i8> @llvm.arm.mve.vmlas.n.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 %0, <16 x i1> %2)
|
||||
ret <16 x i8> %3
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @test_vmlasq_m_n_u16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %c, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vmlasq_m_n_u16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vmlast.u16 q0, q1, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %c to i32
|
||||
%1 = zext i16 %p to i32
|
||||
%2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
|
||||
%3 = tail call <8 x i16> @llvm.arm.mve.vmlas.n.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 %0, <8 x i1> %2)
|
||||
ret <8 x i16> %3
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @test_vmlasq_m_n_u32(<4 x i32> %a, <4 x i32> %b, i32 %c, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vmlasq_m_n_u32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vmlast.u32 q0, q1, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
||||
%2 = tail call <4 x i32> @llvm.arm.mve.vmlas.n.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 %c, <4 x i1> %1)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
|
||||
declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
|
||||
declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
|
||||
declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
|
||||
|
||||
|
@ -240,3 +579,9 @@ declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>)
|
|||
declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
|
||||
declare <8 x half> @llvm.arm.mve.fma.predicated.v8f16.v8i1(<8 x half>, <8 x half>, <8 x half>, <8 x i1>)
|
||||
declare <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x float>, <4 x i1>)
|
||||
declare <16 x i8> @llvm.arm.mve.vmla.n.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, i32, <16 x i1>)
|
||||
declare <8 x i16> @llvm.arm.mve.vmla.n.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, i32, <8 x i1>)
|
||||
declare <4 x i32> @llvm.arm.mve.vmla.n.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>)
|
||||
declare <16 x i8> @llvm.arm.mve.vmlas.n.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, i32, <16 x i1>)
|
||||
declare <8 x i16> @llvm.arm.mve.vmlas.n.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, i32, <8 x i1>)
|
||||
declare <4 x i32> @llvm.arm.mve.vmlas.n.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>)
|
||||
|
|
Loading…
Reference in New Issue