forked from OSchip/llvm-project
[ARM,MVE] Add the `vshlcq` intrinsics.
Summary: The VSHLC instruction performs a left shift of a whole vector register by an immediate shift count up to 32, shifting in new bits at the low end from a GPR and delivering the shifted-out bits from the high end back into the same GPR. Since the instruction produces two outputs (the shifted vector register and the output GPR of shifted-out bits), it has to be instruction-selected in C++ rather than Tablegen. Reviewers: MarkMurrayARM, dmgreen, miyuki, ostannard Reviewed By: miyuki Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D75445
This commit is contained in:
parent
810127f6ab
commit
068b2f313c
|
@ -1166,6 +1166,22 @@ let params = T.Int32 in {
|
|||
defm vsbc: vadcsbc;
|
||||
}
|
||||
|
||||
let params = T.Int in {
|
||||
def vshlcq: Intrinsic<
|
||||
Vector, (args Vector:$v, Ptr<u32>:$ps, imm_1to32:$imm),
|
||||
(seq (load $ps):$s,
|
||||
(IRInt<"vshlc", [Vector]> $v, $s, $imm):$pair,
|
||||
(store (xval $pair, 0), $ps),
|
||||
(xval $pair, 1))>;
|
||||
def vshlcq_m: Intrinsic<
|
||||
Vector, (args Vector:$v, Ptr<u32>:$ps, imm_1to32:$imm, Predicate:$pred),
|
||||
(seq (load $ps):$s,
|
||||
(IRInt<"vshlc_predicated", [Vector, Predicate]>
|
||||
$v, $s, $imm, $pred):$pair,
|
||||
(store (xval $pair, 0), $ps),
|
||||
(xval $pair, 1))>;
|
||||
}
|
||||
|
||||
multiclass VectorComplexAddPred<dag not_halving, dag angle> {
|
||||
def "" : Intrinsic<Vector, (args Vector:$a, Vector:$b),
|
||||
(IRInt<"vcaddq", [Vector]> not_halving, angle, $a, $b)>;
|
||||
|
|
|
@ -0,0 +1,221 @@
|
|||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
|
||||
// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
|
||||
// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
|
||||
|
||||
#include <arm_mve.h>
|
||||
|
||||
// CHECK-LABEL: @test_vshlcq_s8(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[B:%.*]], align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call { i32, <16 x i8> } @llvm.arm.mve.vshlc.v16i8(<16 x i8> [[A:%.*]], i32 [[TMP0]], i32 18)
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, <16 x i8> } [[TMP1]], 0
|
||||
// CHECK-NEXT: store i32 [[TMP2]], i32* [[B]], align 4
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, <16 x i8> } [[TMP1]], 1
|
||||
// CHECK-NEXT: ret <16 x i8> [[TMP3]]
|
||||
//
|
||||
int8x16_t test_vshlcq_s8(int8x16_t a, uint32_t *b) {
|
||||
#ifdef POLYMORPHIC
|
||||
return vshlcq(a, b, 18);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshlcq_s8(a, b, 18);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshlcq_s16(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[B:%.*]], align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call { i32, <8 x i16> } @llvm.arm.mve.vshlc.v8i16(<8 x i16> [[A:%.*]], i32 [[TMP0]], i32 16)
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, <8 x i16> } [[TMP1]], 0
|
||||
// CHECK-NEXT: store i32 [[TMP2]], i32* [[B]], align 4
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, <8 x i16> } [[TMP1]], 1
|
||||
// CHECK-NEXT: ret <8 x i16> [[TMP3]]
|
||||
//
|
||||
int16x8_t test_vshlcq_s16(int16x8_t a, uint32_t *b) {
|
||||
#ifdef POLYMORPHIC
|
||||
return vshlcq(a, b, 16);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshlcq_s16(a, b, 16);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshlcq_s32(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[B:%.*]], align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call { i32, <4 x i32> } @llvm.arm.mve.vshlc.v4i32(<4 x i32> [[A:%.*]], i32 [[TMP0]], i32 4)
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, <4 x i32> } [[TMP1]], 0
|
||||
// CHECK-NEXT: store i32 [[TMP2]], i32* [[B]], align 4
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, <4 x i32> } [[TMP1]], 1
|
||||
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
|
||||
//
|
||||
int32x4_t test_vshlcq_s32(int32x4_t a, uint32_t *b) {
|
||||
#ifdef POLYMORPHIC
|
||||
return vshlcq(a, b, 4);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshlcq_s32(a, b, 4);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshlcq_u8(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[B:%.*]], align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call { i32, <16 x i8> } @llvm.arm.mve.vshlc.v16i8(<16 x i8> [[A:%.*]], i32 [[TMP0]], i32 17)
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, <16 x i8> } [[TMP1]], 0
|
||||
// CHECK-NEXT: store i32 [[TMP2]], i32* [[B]], align 4
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, <16 x i8> } [[TMP1]], 1
|
||||
// CHECK-NEXT: ret <16 x i8> [[TMP3]]
|
||||
//
|
||||
uint8x16_t test_vshlcq_u8(uint8x16_t a, uint32_t *b) {
|
||||
#ifdef POLYMORPHIC
|
||||
return vshlcq(a, b, 17);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshlcq_u8(a, b, 17);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshlcq_u16(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[B:%.*]], align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call { i32, <8 x i16> } @llvm.arm.mve.vshlc.v8i16(<8 x i16> [[A:%.*]], i32 [[TMP0]], i32 17)
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, <8 x i16> } [[TMP1]], 0
|
||||
// CHECK-NEXT: store i32 [[TMP2]], i32* [[B]], align 4
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, <8 x i16> } [[TMP1]], 1
|
||||
// CHECK-NEXT: ret <8 x i16> [[TMP3]]
|
||||
//
|
||||
uint16x8_t test_vshlcq_u16(uint16x8_t a, uint32_t *b) {
|
||||
#ifdef POLYMORPHIC
|
||||
return vshlcq(a, b, 17);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshlcq_u16(a, b, 17);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshlcq_u32(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[B:%.*]], align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call { i32, <4 x i32> } @llvm.arm.mve.vshlc.v4i32(<4 x i32> [[A:%.*]], i32 [[TMP0]], i32 20)
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, <4 x i32> } [[TMP1]], 0
|
||||
// CHECK-NEXT: store i32 [[TMP2]], i32* [[B]], align 4
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, <4 x i32> } [[TMP1]], 1
|
||||
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
|
||||
//
|
||||
uint32x4_t test_vshlcq_u32(uint32x4_t a, uint32_t *b) {
|
||||
#ifdef POLYMORPHIC
|
||||
return vshlcq(a, b, 20);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshlcq_u32(a, b, 20);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshlcq_m_s8(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[B:%.*]], align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = call { i32, <16 x i8> } @llvm.arm.mve.vshlc.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 [[TMP0]], i32 29, <16 x i1> [[TMP2]])
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, <16 x i8> } [[TMP3]], 0
|
||||
// CHECK-NEXT: store i32 [[TMP4]], i32* [[B]], align 4
|
||||
// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { i32, <16 x i8> } [[TMP3]], 1
|
||||
// CHECK-NEXT: ret <16 x i8> [[TMP5]]
|
||||
//
|
||||
int8x16_t test_vshlcq_m_s8(int8x16_t a, uint32_t *b, mve_pred16_t p) {
|
||||
#ifdef POLYMORPHIC
|
||||
return vshlcq_m(a, b, 29, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshlcq_m_s8(a, b, 29, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshlcq_m_s16(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[B:%.*]], align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = call { i32, <8 x i16> } @llvm.arm.mve.vshlc.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 [[TMP0]], i32 17, <8 x i1> [[TMP2]])
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, <8 x i16> } [[TMP3]], 0
|
||||
// CHECK-NEXT: store i32 [[TMP4]], i32* [[B]], align 4
|
||||
// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { i32, <8 x i16> } [[TMP3]], 1
|
||||
// CHECK-NEXT: ret <8 x i16> [[TMP5]]
|
||||
//
|
||||
int16x8_t test_vshlcq_m_s16(int16x8_t a, uint32_t *b, mve_pred16_t p) {
|
||||
#ifdef POLYMORPHIC
|
||||
return vshlcq_m(a, b, 17, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshlcq_m_s16(a, b, 17, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshlcq_m_s32(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[B:%.*]], align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = call { i32, <4 x i32> } @llvm.arm.mve.vshlc.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 [[TMP0]], i32 9, <4 x i1> [[TMP2]])
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, <4 x i32> } [[TMP3]], 0
|
||||
// CHECK-NEXT: store i32 [[TMP4]], i32* [[B]], align 4
|
||||
// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { i32, <4 x i32> } [[TMP3]], 1
|
||||
// CHECK-NEXT: ret <4 x i32> [[TMP5]]
|
||||
//
|
||||
int32x4_t test_vshlcq_m_s32(int32x4_t a, uint32_t *b, mve_pred16_t p) {
|
||||
#ifdef POLYMORPHIC
|
||||
return vshlcq_m(a, b, 9, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshlcq_m_s32(a, b, 9, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshlcq_m_u8(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[B:%.*]], align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = call { i32, <16 x i8> } @llvm.arm.mve.vshlc.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 [[TMP0]], i32 21, <16 x i1> [[TMP2]])
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, <16 x i8> } [[TMP3]], 0
|
||||
// CHECK-NEXT: store i32 [[TMP4]], i32* [[B]], align 4
|
||||
// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { i32, <16 x i8> } [[TMP3]], 1
|
||||
// CHECK-NEXT: ret <16 x i8> [[TMP5]]
|
||||
//
|
||||
uint8x16_t test_vshlcq_m_u8(uint8x16_t a, uint32_t *b, mve_pred16_t p) {
|
||||
#ifdef POLYMORPHIC
|
||||
return vshlcq_m(a, b, 21, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshlcq_m_u8(a, b, 21, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshlcq_m_u16(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[B:%.*]], align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = call { i32, <8 x i16> } @llvm.arm.mve.vshlc.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 [[TMP0]], i32 24, <8 x i1> [[TMP2]])
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, <8 x i16> } [[TMP3]], 0
|
||||
// CHECK-NEXT: store i32 [[TMP4]], i32* [[B]], align 4
|
||||
// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { i32, <8 x i16> } [[TMP3]], 1
|
||||
// CHECK-NEXT: ret <8 x i16> [[TMP5]]
|
||||
//
|
||||
uint16x8_t test_vshlcq_m_u16(uint16x8_t a, uint32_t *b, mve_pred16_t p) {
|
||||
#ifdef POLYMORPHIC
|
||||
return vshlcq_m(a, b, 24, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshlcq_m_u16(a, b, 24, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshlcq_m_u32(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[B:%.*]], align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = call { i32, <4 x i32> } @llvm.arm.mve.vshlc.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 [[TMP0]], i32 26, <4 x i1> [[TMP2]])
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, <4 x i32> } [[TMP3]], 0
|
||||
// CHECK-NEXT: store i32 [[TMP4]], i32* [[B]], align 4
|
||||
// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { i32, <4 x i32> } [[TMP3]], 1
|
||||
// CHECK-NEXT: ret <4 x i32> [[TMP5]]
|
||||
//
|
||||
uint32x4_t test_vshlcq_m_u32(uint32x4_t a, uint32_t *b, mve_pred16_t p) {
|
||||
#ifdef POLYMORPHIC
|
||||
return vshlcq_m(a, b, 26, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshlcq_m_u32(a, b, 26, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
|
@ -1031,6 +1031,14 @@ def int_arm_mve_vsbc_predicated: Intrinsic<
|
|||
[llvm_anyvector_ty, llvm_i32_ty],
|
||||
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
|
||||
llvm_i32_ty, llvm_anyvector_ty], [IntrNoMem]>;
|
||||
def int_arm_mve_vshlc: Intrinsic<
|
||||
[llvm_i32_ty /* bits shifted out */, llvm_anyvector_ty],
|
||||
[LLVMMatchType<0>, llvm_i32_ty /* bits shifted in */,
|
||||
llvm_i32_ty /* shift count */], [IntrNoMem]>;
|
||||
def int_arm_mve_vshlc_predicated: Intrinsic<
|
||||
[llvm_i32_ty /* bits shifted out */, llvm_anyvector_ty],
|
||||
[LLVMMatchType<0>, llvm_i32_ty /* bits shifted in */,
|
||||
llvm_i32_ty /* shift count */, llvm_anyvector_ty], [IntrNoMem]>;
|
||||
def int_arm_mve_vmulh: Intrinsic<
|
||||
[llvm_anyvector_ty],
|
||||
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],
|
||||
|
|
|
@ -237,6 +237,10 @@ private:
|
|||
void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
|
||||
uint16_t OpcodeWithNoCarry, bool Add, bool Predicated);
|
||||
|
||||
/// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between
|
||||
/// vector lanes.
|
||||
void SelectMVE_VSHLC(SDNode *N, bool Predicated);
|
||||
|
||||
/// Select long MVE vector reductions with two vector operands
|
||||
/// Stride is the number of vector element widths the instruction can operate
|
||||
/// on:
|
||||
|
@ -2569,6 +2573,25 @@ void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
|
|||
CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
|
||||
}
|
||||
|
||||
void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) {
|
||||
SDLoc Loc(N);
|
||||
SmallVector<SDValue, 8> Ops;
|
||||
|
||||
// One vector input, followed by a 32-bit word of bits to shift in
|
||||
// and then an immediate shift count
|
||||
Ops.push_back(N->getOperand(1));
|
||||
Ops.push_back(N->getOperand(2));
|
||||
int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
|
||||
Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
|
||||
|
||||
if (Predicated)
|
||||
AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
|
||||
else
|
||||
AddEmptyMVEPredicateToOps(Ops, Loc);
|
||||
|
||||
CurDAG->SelectNodeTo(N, ARM::MVE_VSHLC, N->getVTList(), makeArrayRef(Ops));
|
||||
}
|
||||
|
||||
static bool SDValueToConstBool(SDValue SDVal) {
|
||||
assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant");
|
||||
ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal);
|
||||
|
@ -4593,6 +4616,10 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
|
|||
SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, true,
|
||||
IntNo == Intrinsic::arm_mve_vsbc_predicated);
|
||||
return;
|
||||
case Intrinsic::arm_mve_vshlc:
|
||||
case Intrinsic::arm_mve_vshlc_predicated:
|
||||
SelectMVE_VSHLC(N, IntNo == Intrinsic::arm_mve_vshlc_predicated);
|
||||
return;
|
||||
|
||||
case Intrinsic::arm_mve_vmlldava:
|
||||
case Intrinsic::arm_mve_vmlldava_predicated: {
|
||||
|
|
|
@ -0,0 +1,228 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -verify-machineinstrs -o - %s | FileCheck %s
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @test_vshlcq_s8(<16 x i8> %a, i32* nocapture %b) {
|
||||
; CHECK-LABEL: test_vshlcq_s8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: ldr r1, [r0]
|
||||
; CHECK-NEXT: vshlc q0, r1, #18
|
||||
; CHECK-NEXT: str r1, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = load i32, i32* %b, align 4
|
||||
%1 = tail call { i32, <16 x i8> } @llvm.arm.mve.vshlc.v16i8(<16 x i8> %a, i32 %0, i32 18)
|
||||
%2 = extractvalue { i32, <16 x i8> } %1, 0
|
||||
store i32 %2, i32* %b, align 4
|
||||
%3 = extractvalue { i32, <16 x i8> } %1, 1
|
||||
ret <16 x i8> %3
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @test_vshlcq_s16(<8 x i16> %a, i32* nocapture %b) {
|
||||
; CHECK-LABEL: test_vshlcq_s16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: ldr r1, [r0]
|
||||
; CHECK-NEXT: vshlc q0, r1, #16
|
||||
; CHECK-NEXT: str r1, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = load i32, i32* %b, align 4
|
||||
%1 = tail call { i32, <8 x i16> } @llvm.arm.mve.vshlc.v8i16(<8 x i16> %a, i32 %0, i32 16)
|
||||
%2 = extractvalue { i32, <8 x i16> } %1, 0
|
||||
store i32 %2, i32* %b, align 4
|
||||
%3 = extractvalue { i32, <8 x i16> } %1, 1
|
||||
ret <8 x i16> %3
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @test_vshlcq_s32(<4 x i32> %a, i32* nocapture %b) {
|
||||
; CHECK-LABEL: test_vshlcq_s32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: ldr r1, [r0]
|
||||
; CHECK-NEXT: vshlc q0, r1, #4
|
||||
; CHECK-NEXT: str r1, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = load i32, i32* %b, align 4
|
||||
%1 = tail call { i32, <4 x i32> } @llvm.arm.mve.vshlc.v4i32(<4 x i32> %a, i32 %0, i32 4)
|
||||
%2 = extractvalue { i32, <4 x i32> } %1, 0
|
||||
store i32 %2, i32* %b, align 4
|
||||
%3 = extractvalue { i32, <4 x i32> } %1, 1
|
||||
ret <4 x i32> %3
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @test_vshlcq_u8(<16 x i8> %a, i32* nocapture %b) {
|
||||
; CHECK-LABEL: test_vshlcq_u8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: ldr r1, [r0]
|
||||
; CHECK-NEXT: vshlc q0, r1, #17
|
||||
; CHECK-NEXT: str r1, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = load i32, i32* %b, align 4
|
||||
%1 = tail call { i32, <16 x i8> } @llvm.arm.mve.vshlc.v16i8(<16 x i8> %a, i32 %0, i32 17)
|
||||
%2 = extractvalue { i32, <16 x i8> } %1, 0
|
||||
store i32 %2, i32* %b, align 4
|
||||
%3 = extractvalue { i32, <16 x i8> } %1, 1
|
||||
ret <16 x i8> %3
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @test_vshlcq_u16(<8 x i16> %a, i32* nocapture %b) {
|
||||
; CHECK-LABEL: test_vshlcq_u16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: ldr r1, [r0]
|
||||
; CHECK-NEXT: vshlc q0, r1, #17
|
||||
; CHECK-NEXT: str r1, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = load i32, i32* %b, align 4
|
||||
%1 = tail call { i32, <8 x i16> } @llvm.arm.mve.vshlc.v8i16(<8 x i16> %a, i32 %0, i32 17)
|
||||
%2 = extractvalue { i32, <8 x i16> } %1, 0
|
||||
store i32 %2, i32* %b, align 4
|
||||
%3 = extractvalue { i32, <8 x i16> } %1, 1
|
||||
ret <8 x i16> %3
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @test_vshlcq_u32(<4 x i32> %a, i32* nocapture %b) {
|
||||
; CHECK-LABEL: test_vshlcq_u32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: ldr r1, [r0]
|
||||
; CHECK-NEXT: vshlc q0, r1, #20
|
||||
; CHECK-NEXT: str r1, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = load i32, i32* %b, align 4
|
||||
%1 = tail call { i32, <4 x i32> } @llvm.arm.mve.vshlc.v4i32(<4 x i32> %a, i32 %0, i32 20)
|
||||
%2 = extractvalue { i32, <4 x i32> } %1, 0
|
||||
store i32 %2, i32* %b, align 4
|
||||
%3 = extractvalue { i32, <4 x i32> } %1, 1
|
||||
ret <4 x i32> %3
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @test_vshlcq_m_s8(<16 x i8> %a, i32* nocapture %b, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshlcq_m_s8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r1
|
||||
; CHECK-NEXT: ldr r1, [r0]
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshlct q0, r1, #29
|
||||
; CHECK-NEXT: str r1, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = load i32, i32* %b, align 4
|
||||
%1 = zext i16 %p to i32
|
||||
%2 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
|
||||
%3 = tail call { i32, <16 x i8> } @llvm.arm.mve.vshlc.predicated.v16i8.v16i1(<16 x i8> %a, i32 %0, i32 29, <16 x i1> %2)
|
||||
%4 = extractvalue { i32, <16 x i8> } %3, 0
|
||||
store i32 %4, i32* %b, align 4
|
||||
%5 = extractvalue { i32, <16 x i8> } %3, 1
|
||||
ret <16 x i8> %5
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @test_vshlcq_m_s16(<8 x i16> %a, i32* nocapture %b, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshlcq_m_s16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r1
|
||||
; CHECK-NEXT: ldr r1, [r0]
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshlct q0, r1, #17
|
||||
; CHECK-NEXT: str r1, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = load i32, i32* %b, align 4
|
||||
%1 = zext i16 %p to i32
|
||||
%2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
|
||||
%3 = tail call { i32, <8 x i16> } @llvm.arm.mve.vshlc.predicated.v8i16.v8i1(<8 x i16> %a, i32 %0, i32 17, <8 x i1> %2)
|
||||
%4 = extractvalue { i32, <8 x i16> } %3, 0
|
||||
store i32 %4, i32* %b, align 4
|
||||
%5 = extractvalue { i32, <8 x i16> } %3, 1
|
||||
ret <8 x i16> %5
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @test_vshlcq_m_s32(<4 x i32> %a, i32* nocapture %b, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshlcq_m_s32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r1
|
||||
; CHECK-NEXT: ldr r1, [r0]
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshlct q0, r1, #9
|
||||
; CHECK-NEXT: str r1, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = load i32, i32* %b, align 4
|
||||
%1 = zext i16 %p to i32
|
||||
%2 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
|
||||
%3 = tail call { i32, <4 x i32> } @llvm.arm.mve.vshlc.predicated.v4i32.v4i1(<4 x i32> %a, i32 %0, i32 9, <4 x i1> %2)
|
||||
%4 = extractvalue { i32, <4 x i32> } %3, 0
|
||||
store i32 %4, i32* %b, align 4
|
||||
%5 = extractvalue { i32, <4 x i32> } %3, 1
|
||||
ret <4 x i32> %5
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @test_vshlcq_m_u8(<16 x i8> %a, i32* nocapture %b, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshlcq_m_u8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r1
|
||||
; CHECK-NEXT: ldr r1, [r0]
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshlct q0, r1, #21
|
||||
; CHECK-NEXT: str r1, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = load i32, i32* %b, align 4
|
||||
%1 = zext i16 %p to i32
|
||||
%2 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
|
||||
%3 = tail call { i32, <16 x i8> } @llvm.arm.mve.vshlc.predicated.v16i8.v16i1(<16 x i8> %a, i32 %0, i32 21, <16 x i1> %2)
|
||||
%4 = extractvalue { i32, <16 x i8> } %3, 0
|
||||
store i32 %4, i32* %b, align 4
|
||||
%5 = extractvalue { i32, <16 x i8> } %3, 1
|
||||
ret <16 x i8> %5
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @test_vshlcq_m_u16(<8 x i16> %a, i32* nocapture %b, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshlcq_m_u16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r1
|
||||
; CHECK-NEXT: ldr r1, [r0]
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshlct q0, r1, #24
|
||||
; CHECK-NEXT: str r1, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = load i32, i32* %b, align 4
|
||||
%1 = zext i16 %p to i32
|
||||
%2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
|
||||
%3 = tail call { i32, <8 x i16> } @llvm.arm.mve.vshlc.predicated.v8i16.v8i1(<8 x i16> %a, i32 %0, i32 24, <8 x i1> %2)
|
||||
%4 = extractvalue { i32, <8 x i16> } %3, 0
|
||||
store i32 %4, i32* %b, align 4
|
||||
%5 = extractvalue { i32, <8 x i16> } %3, 1
|
||||
ret <8 x i16> %5
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @test_vshlcq_m_u32(<4 x i32> %a, i32* nocapture %b, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshlcq_m_u32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r1
|
||||
; CHECK-NEXT: ldr r1, [r0]
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshlct q0, r1, #26
|
||||
; CHECK-NEXT: str r1, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = load i32, i32* %b, align 4
|
||||
%1 = zext i16 %p to i32
|
||||
%2 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
|
||||
%3 = tail call { i32, <4 x i32> } @llvm.arm.mve.vshlc.predicated.v4i32.v4i1(<4 x i32> %a, i32 %0, i32 26, <4 x i1> %2)
|
||||
%4 = extractvalue { i32, <4 x i32> } %3, 0
|
||||
store i32 %4, i32* %b, align 4
|
||||
%5 = extractvalue { i32, <4 x i32> } %3, 1
|
||||
ret <4 x i32> %5
|
||||
}
|
||||
|
||||
declare { i32, <16 x i8> } @llvm.arm.mve.vshlc.v16i8(<16 x i8>, i32, i32)
|
||||
declare { i32, <8 x i16> } @llvm.arm.mve.vshlc.v8i16(<8 x i16>, i32, i32)
|
||||
declare { i32, <4 x i32> } @llvm.arm.mve.vshlc.v4i32(<4 x i32>, i32, i32)
|
||||
declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
|
||||
declare { i32, <16 x i8> } @llvm.arm.mve.vshlc.predicated.v16i8.v16i1(<16 x i8>, i32, i32, <16 x i1>)
|
||||
declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
|
||||
declare { i32, <8 x i16> } @llvm.arm.mve.vshlc.predicated.v8i16.v8i1(<8 x i16>, i32, i32, <8 x i1>)
|
||||
declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
|
||||
declare { i32, <4 x i32> } @llvm.arm.mve.vshlc.predicated.v4i32.v4i1(<4 x i32>, i32, i32, <4 x i1>)
|
Loading…
Reference in New Issue