forked from OSchip/llvm-project
[ARM,CDE] Implement CDE S and D-register intrinsics
Summary: This patch implements the following ACLE intrinsics: uint32_t __arm_vcx1_u32(int coproc, uint32_t imm); uint32_t __arm_vcx1a_u32(int coproc, uint32_t acc, uint32_t imm); uint32_t __arm_vcx2_u32(int coproc, uint32_t n, uint32_t imm); uint32_t __arm_vcx2a_u32(int coproc, uint32_t acc, uint32_t n, uint32_t imm); uint32_t __arm_vcx3_u32(int coproc, uint32_t n, uint32_t m, uint32_t imm); uint32_t __arm_vcx3a_u32(int coproc, uint32_t acc, uint32_t n, uint32_t m, uint32_t imm); uint64_t __arm_vcx1d_u64(int coproc, uint32_t imm); uint64_t __arm_vcx1da_u64(int coproc, uint64_t acc, uint32_t imm); uint64_t __arm_vcx2d_u64(int coproc, uint64_t m, uint32_t imm); uint64_t __arm_vcx2da_u64(int coproc, uint64_t acc, uint64_t m, uint32_t imm); uint64_t __arm_vcx3d_u64(int coproc, uint64_t n, uint64_t m, uint32_t imm); uint64_t __arm_vcx3da_u64(int coproc, uint64_t acc, uint64_t n, uint64_t m, uint32_t imm); Since the semantics of CDE instructions is opaque to the compiler, the ACLE intrinsics require dedicated LLVM IR intrinsics. The 64-bit and 32-bit variants share the same IR intrinsic. Reviewers: simon_tatham, MarkMurrayARM, ostannard, dmgreen Reviewed By: MarkMurrayARM Subscribers: kristof.beyls, hiraditya, danielkiss, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D76298
This commit is contained in:
parent
7a85e3585e
commit
d22e661712
|
@ -13,6 +13,15 @@
|
||||||
|
|
||||||
include "arm_mve_defs.td"
|
include "arm_mve_defs.td"
|
||||||
|
|
||||||
|
// f64 is not defined in arm_mve_defs.td because MVE instructions only work with
|
||||||
|
// f16 and f32
|
||||||
|
def f64: PrimitiveType<"f", 64>;
|
||||||
|
|
||||||
|
// Float<t> expects t to be a scalar type, and expands to the floating-point
|
||||||
|
// type of the same width.
|
||||||
|
class Float<Type t>: ComplexType<(CTO_CopyKind t, f32)>;
|
||||||
|
def FScalar: Float<Scalar>;
|
||||||
|
|
||||||
// ACLE CDE intrinsic
|
// ACLE CDE intrinsic
|
||||||
class CDEIntrinsic<Type ret, dag args, dag codegen>
|
class CDEIntrinsic<Type ret, dag args, dag codegen>
|
||||||
: Intrinsic<ret, args, codegen> {
|
: Intrinsic<ret, args, codegen> {
|
||||||
|
@ -70,3 +79,31 @@ multiclass CDE_CX_m<dag argsImm, dag argsReg, dag cgArgs> {
|
||||||
defm cx1 : CDE_CX_m<(args imm_13b:$imm), (args), (?)>;
|
defm cx1 : CDE_CX_m<(args imm_13b:$imm), (args), (?)>;
|
||||||
defm cx2 : CDE_CX_m<(args imm_9b:$imm), (args u32:$n), (? $n)>;
|
defm cx2 : CDE_CX_m<(args imm_9b:$imm), (args u32:$n), (? $n)>;
|
||||||
defm cx3 : CDE_CX_m<(args imm_6b:$imm), (args u32:$n, u32:$m), (? $n, $m)>;
|
defm cx3 : CDE_CX_m<(args imm_6b:$imm), (args u32:$n, u32:$m), (? $n, $m)>;
|
||||||
|
|
||||||
|
// VCX* instructions operating on VFP registers
|
||||||
|
multiclass CDE_VCXFP_m<dag argsImm, dag argsReg32, dag argsReg64, dag cgArgs> {
|
||||||
|
defvar cp = (args imm_coproc:$cp);
|
||||||
|
let pnt = PNT_None, params = [u32] in {
|
||||||
|
def "" : CDEIntrinsic<u32, !con(cp, argsReg32, argsImm),
|
||||||
|
(bitcast !con((CDEIRInt<NAME, [f32]> $cp), cgArgs, (? $imm)),
|
||||||
|
Scalar)>;
|
||||||
|
def a : CDEIntrinsic<u32, !con(cp, (args u32:$acc), argsReg32, argsImm),
|
||||||
|
(bitcast !con((CDEIRInt<NAME # "a", [f32]> $cp,
|
||||||
|
(bitcast $acc, FScalar)), cgArgs, (? $imm)), Scalar)>;
|
||||||
|
}
|
||||||
|
let pnt = PNT_None, params = [u64] in {
|
||||||
|
def d : CDEIntrinsic<u64, !con(cp, argsReg64, argsImm),
|
||||||
|
(bitcast !con((CDEIRInt<NAME, [f64]> $cp), cgArgs, (? $imm)),
|
||||||
|
Scalar)>;
|
||||||
|
def da : CDEIntrinsic<u64, !con(cp, (args u64:$acc), argsReg64, argsImm),
|
||||||
|
(bitcast !con((CDEIRInt<NAME # "a", [f64]> $cp,
|
||||||
|
(bitcast $acc, FScalar)), cgArgs, (? $imm)), Scalar)>;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defm vcx1: CDE_VCXFP_m<(args imm_11b:$imm), (args), (args), (?)>;
|
||||||
|
defm vcx2: CDE_VCXFP_m<(args imm_6b:$imm), (args u32:$n), (args u64:$n),
|
||||||
|
(? (bitcast $n, FScalar))>;
|
||||||
|
defm vcx3: CDE_VCXFP_m<(args imm_3b:$imm),
|
||||||
|
(args u32:$n, u32:$m), (args u64:$n, u64:$m),
|
||||||
|
(? (bitcast $n, FScalar), (bitcast $m, FScalar))>;
|
||||||
|
|
|
@ -0,0 +1,145 @@
|
||||||
|
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
|
||||||
|
// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi \
|
||||||
|
// RUN: -target-feature +cdecp0 -target-feature +cdecp1 \
|
||||||
|
// RUN: -mfloat-abi hard -O0 -disable-O0-optnone \
|
||||||
|
// RUN: -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
|
||||||
|
|
||||||
|
#include <arm_cde.h>
|
||||||
|
|
||||||
|
// CHECK-LABEL: @test_vcx1_u32(
|
||||||
|
// CHECK-NEXT: entry:
|
||||||
|
// CHECK-NEXT: [[TMP0:%.*]] = call float @llvm.arm.cde.vcx1.f32(i32 0, i32 11)
|
||||||
|
// CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[TMP0]] to i32
|
||||||
|
// CHECK-NEXT: ret i32 [[TMP1]]
|
||||||
|
//
|
||||||
|
uint32_t test_vcx1_u32(void) {
|
||||||
|
return __arm_vcx1_u32(0, 11);
|
||||||
|
}
|
||||||
|
|
||||||
|
// CHECK-LABEL: @test_vcx1a_u32(
|
||||||
|
// CHECK-NEXT: entry:
|
||||||
|
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 [[ACC:%.*]] to float
|
||||||
|
// CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.arm.cde.vcx1a.f32(i32 1, float [[TMP0]], i32 12)
|
||||||
|
// CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[TMP1]] to i32
|
||||||
|
// CHECK-NEXT: ret i32 [[TMP2]]
|
||||||
|
//
|
||||||
|
uint32_t test_vcx1a_u32(uint32_t acc) {
|
||||||
|
return __arm_vcx1a_u32(1, acc, 12);
|
||||||
|
}
|
||||||
|
|
||||||
|
// CHECK-LABEL: @test_vcx2_u32(
|
||||||
|
// CHECK-NEXT: entry:
|
||||||
|
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 [[N:%.*]] to float
|
||||||
|
// CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.arm.cde.vcx2.f32(i32 0, float [[TMP0]], i32 21)
|
||||||
|
// CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[TMP1]] to i32
|
||||||
|
// CHECK-NEXT: ret i32 [[TMP2]]
|
||||||
|
//
|
||||||
|
uint32_t test_vcx2_u32(uint32_t n) {
|
||||||
|
return __arm_vcx2_u32(0, n, 21);
|
||||||
|
}
|
||||||
|
|
||||||
|
// CHECK-LABEL: @test_vcx2a_u32(
|
||||||
|
// CHECK-NEXT: entry:
|
||||||
|
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 [[ACC:%.*]] to float
|
||||||
|
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[N:%.*]] to float
|
||||||
|
// CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.arm.cde.vcx2a.f32(i32 0, float [[TMP0]], float [[TMP1]], i32 22)
|
||||||
|
// CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
|
||||||
|
// CHECK-NEXT: ret i32 [[TMP3]]
|
||||||
|
//
|
||||||
|
uint32_t test_vcx2a_u32(uint32_t acc, uint32_t n) {
|
||||||
|
return __arm_vcx2a_u32(0, acc, n, 22);
|
||||||
|
}
|
||||||
|
|
||||||
|
// CHECK-LABEL: @test_vcx3_u32(
|
||||||
|
// CHECK-NEXT: entry:
|
||||||
|
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 [[N:%.*]] to float
|
||||||
|
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[M:%.*]] to float
|
||||||
|
// CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.arm.cde.vcx3.f32(i32 1, float [[TMP0]], float [[TMP1]], i32 3)
|
||||||
|
// CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
|
||||||
|
// CHECK-NEXT: ret i32 [[TMP3]]
|
||||||
|
//
|
||||||
|
uint32_t test_vcx3_u32(uint32_t n, uint32_t m) {
|
||||||
|
return __arm_vcx3_u32(1, n, m, 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
// CHECK-LABEL: @test_vcx3a_u32(
|
||||||
|
// CHECK-NEXT: entry:
|
||||||
|
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 [[ACC:%.*]] to float
|
||||||
|
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[N:%.*]] to float
|
||||||
|
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[M:%.*]] to float
|
||||||
|
// CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.arm.cde.vcx3a.f32(i32 0, float [[TMP0]], float [[TMP1]], float [[TMP2]], i32 5)
|
||||||
|
// CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[TMP3]] to i32
|
||||||
|
// CHECK-NEXT: ret i32 [[TMP4]]
|
||||||
|
//
|
||||||
|
uint32_t test_vcx3a_u32(uint32_t acc, uint32_t n, uint32_t m) {
|
||||||
|
return __arm_vcx3a_u32(0, acc, n, m, 5);
|
||||||
|
}
|
||||||
|
|
||||||
|
// CHECK-LABEL: @test_vcx1d_u64(
|
||||||
|
// CHECK-NEXT: entry:
|
||||||
|
// CHECK-NEXT: [[TMP0:%.*]] = call double @llvm.arm.cde.vcx1.f64(i32 0, i32 11)
|
||||||
|
// CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[TMP0]] to i64
|
||||||
|
// CHECK-NEXT: ret i64 [[TMP1]]
|
||||||
|
//
|
||||||
|
uint64_t test_vcx1d_u64(void) {
|
||||||
|
return __arm_vcx1d_u64(0, 11);
|
||||||
|
}
|
||||||
|
|
||||||
|
// CHECK-LABEL: @test_vcx1da_u64(
|
||||||
|
// CHECK-NEXT: entry:
|
||||||
|
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[ACC:%.*]] to double
|
||||||
|
// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.arm.cde.vcx1a.f64(i32 1, double [[TMP0]], i32 12)
|
||||||
|
// CHECK-NEXT: [[TMP2:%.*]] = bitcast double [[TMP1]] to i64
|
||||||
|
// CHECK-NEXT: ret i64 [[TMP2]]
|
||||||
|
//
|
||||||
|
uint64_t test_vcx1da_u64(uint64_t acc) {
|
||||||
|
return __arm_vcx1da_u64(1, acc, 12);
|
||||||
|
}
|
||||||
|
|
||||||
|
// CHECK-LABEL: @test_vcx2d_u64(
|
||||||
|
// CHECK-NEXT: entry:
|
||||||
|
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[N:%.*]] to double
|
||||||
|
// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.arm.cde.vcx2.f64(i32 0, double [[TMP0]], i32 21)
|
||||||
|
// CHECK-NEXT: [[TMP2:%.*]] = bitcast double [[TMP1]] to i64
|
||||||
|
// CHECK-NEXT: ret i64 [[TMP2]]
|
||||||
|
//
|
||||||
|
uint64_t test_vcx2d_u64(uint64_t n) {
|
||||||
|
return __arm_vcx2d_u64(0, n, 21);
|
||||||
|
}
|
||||||
|
|
||||||
|
// CHECK-LABEL: @test_vcx2da_u64(
|
||||||
|
// CHECK-NEXT: entry:
|
||||||
|
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[ACC:%.*]] to double
|
||||||
|
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[N:%.*]] to double
|
||||||
|
// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.arm.cde.vcx2a.f64(i32 0, double [[TMP0]], double [[TMP1]], i32 22)
|
||||||
|
// CHECK-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
|
||||||
|
// CHECK-NEXT: ret i64 [[TMP3]]
|
||||||
|
//
|
||||||
|
uint64_t test_vcx2da_u64(uint64_t acc, uint64_t n) {
|
||||||
|
return __arm_vcx2da_u64(0, acc, n, 22);
|
||||||
|
}
|
||||||
|
|
||||||
|
// CHECK-LABEL: @test_vcx3d_u64(
|
||||||
|
// CHECK-NEXT: entry:
|
||||||
|
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[N:%.*]] to double
|
||||||
|
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[M:%.*]] to double
|
||||||
|
// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.arm.cde.vcx3.f64(i32 1, double [[TMP0]], double [[TMP1]], i32 3)
|
||||||
|
// CHECK-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
|
||||||
|
// CHECK-NEXT: ret i64 [[TMP3]]
|
||||||
|
//
|
||||||
|
uint64_t test_vcx3d_u64(uint64_t n, uint64_t m) {
|
||||||
|
return __arm_vcx3d_u64(1, n, m, 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
// CHECK-LABEL: @test_vcx3da_u64(
|
||||||
|
// CHECK-NEXT: entry:
|
||||||
|
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[ACC:%.*]] to double
|
||||||
|
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[N:%.*]] to double
|
||||||
|
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[M:%.*]] to double
|
||||||
|
// CHECK-NEXT: [[TMP3:%.*]] = call double @llvm.arm.cde.vcx3a.f64(i32 0, double [[TMP0]], double [[TMP1]], double [[TMP2]], i32 5)
|
||||||
|
// CHECK-NEXT: [[TMP4:%.*]] = bitcast double [[TMP3]] to i64
|
||||||
|
// CHECK-NEXT: ret i64 [[TMP4]]
|
||||||
|
//
|
||||||
|
uint64_t test_vcx3da_u64(uint64_t acc, uint64_t n, uint64_t m) {
|
||||||
|
return __arm_vcx3da_u64(0, acc, n, m, 5);
|
||||||
|
}
|
|
@ -63,3 +63,43 @@ void test_cx(uint32_t a, uint64_t da, uint32_t n, uint32_t m) {
|
||||||
__arm_cx3da(0, da, n, m, a); // expected-error {{argument to '__arm_cx3da' must be a constant integer}}
|
__arm_cx3da(0, da, n, m, a); // expected-error {{argument to '__arm_cx3da' must be a constant integer}}
|
||||||
__arm_cx3da(0, da, n, m, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}}
|
__arm_cx3da(0, da, n, m, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void test_vcxfp_u32(uint32_t a, uint32_t n, uint32_t m) {
|
||||||
|
(void)__arm_vcx1_u32(0, 0);
|
||||||
|
__arm_vcx1_u32(0, a); // expected-error {{argument to '__arm_vcx1_u32' must be a constant integer}}
|
||||||
|
__arm_vcx1_u32(0, 2048); // expected-error {{argument value 2048 is outside the valid range [0, 2047]}}
|
||||||
|
__arm_vcx1a_u32(0, a, a); // expected-error {{argument to '__arm_vcx1a_u32' must be a constant integer}}
|
||||||
|
__arm_vcx1a_u32(0, a, 2048); // expected-error {{argument value 2048 is outside the valid range [0, 2047]}}
|
||||||
|
|
||||||
|
(void)__arm_vcx2_u32(0, n, 0);
|
||||||
|
__arm_vcx2_u32(0, n, a); // expected-error {{argument to '__arm_vcx2_u32' must be a constant integer}}
|
||||||
|
__arm_vcx2_u32(0, n, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}}
|
||||||
|
__arm_vcx2a_u32(0, a, n, a); // expected-error {{argument to '__arm_vcx2a_u32' must be a constant integer}}
|
||||||
|
__arm_vcx2a_u32(0, a, n, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}}
|
||||||
|
|
||||||
|
(void)__arm_vcx3_u32(0, n, m, 0);
|
||||||
|
__arm_vcx3_u32(0, n, m, a); // expected-error {{argument to '__arm_vcx3_u32' must be a constant integer}}
|
||||||
|
__arm_vcx3_u32(0, n, m, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
|
||||||
|
__arm_vcx3a_u32(0, a, n, m, a); // expected-error {{argument to '__arm_vcx3a_u32' must be a constant integer}}
|
||||||
|
__arm_vcx3a_u32(0, a, n, m, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_vcxfp_u64(uint64_t a, uint64_t n, uint64_t m) {
|
||||||
|
(void)__arm_vcx1d_u64(0, 0);
|
||||||
|
__arm_vcx1d_u64(0, a); // expected-error {{argument to '__arm_vcx1d_u64' must be a constant integer}}
|
||||||
|
__arm_vcx1d_u64(0, 2048); // expected-error {{argument value 2048 is outside the valid range [0, 2047]}}
|
||||||
|
__arm_vcx1da_u64(0, a, a); // expected-error {{argument to '__arm_vcx1da_u64' must be a constant integer}}
|
||||||
|
__arm_vcx1da_u64(0, a, 2048); // expected-error {{argument value 2048 is outside the valid range [0, 2047]}}
|
||||||
|
|
||||||
|
(void)__arm_vcx2d_u64(0, n, 0);
|
||||||
|
__arm_vcx2d_u64(0, n, a); // expected-error {{argument to '__arm_vcx2d_u64' must be a constant integer}}
|
||||||
|
__arm_vcx2d_u64(0, n, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}}
|
||||||
|
__arm_vcx2da_u64(0, a, n, a); // expected-error {{argument to '__arm_vcx2da_u64' must be a constant integer}}
|
||||||
|
__arm_vcx2da_u64(0, a, n, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}}
|
||||||
|
|
||||||
|
(void)__arm_vcx3d_u64(0, n, m, 0);
|
||||||
|
__arm_vcx3d_u64(0, n, m, a); // expected-error {{argument to '__arm_vcx3d_u64' must be a constant integer}}
|
||||||
|
__arm_vcx3d_u64(0, n, m, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
|
||||||
|
__arm_vcx3da_u64(0, a, n, m, a); // expected-error {{argument to '__arm_vcx3da_u64' must be a constant integer}}
|
||||||
|
__arm_vcx3da_u64(0, a, n, m, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
|
||||||
|
}
|
||||||
|
|
|
@ -1995,6 +1995,9 @@ void CdeEmitter::EmitHeader(raw_ostream &OS) {
|
||||||
const ScalarType *ST = kv.second.get();
|
const ScalarType *ST = kv.second.get();
|
||||||
if (ST->hasNonstandardName())
|
if (ST->hasNonstandardName())
|
||||||
continue;
|
continue;
|
||||||
|
// We don't have float64x2_t
|
||||||
|
if (ST->kind() == ScalarTypeKind::Float && ST->sizeInBits() == 64)
|
||||||
|
continue;
|
||||||
raw_ostream &OS = parts[ST->requiresFloat() ? MVEFloat : MVE];
|
raw_ostream &OS = parts[ST->requiresFloat() ? MVEFloat : MVE];
|
||||||
const VectorType *VT = getVectorType(ST);
|
const VectorType *VT = getVectorType(ST);
|
||||||
|
|
||||||
|
|
|
@ -1301,4 +1301,20 @@ defm int_arm_cde_cx1: CDEGPRIntrinsics<[]>;
|
||||||
defm int_arm_cde_cx2: CDEGPRIntrinsics<[llvm_i32_ty]>;
|
defm int_arm_cde_cx2: CDEGPRIntrinsics<[llvm_i32_ty]>;
|
||||||
defm int_arm_cde_cx3: CDEGPRIntrinsics<[llvm_i32_ty, llvm_i32_ty]>;
|
defm int_arm_cde_cx3: CDEGPRIntrinsics<[llvm_i32_ty, llvm_i32_ty]>;
|
||||||
|
|
||||||
|
multiclass CDEVCXIntrinsics<list<LLVMType> args> {
|
||||||
|
def "" : Intrinsic<
|
||||||
|
[llvm_anyfloat_ty],
|
||||||
|
!listconcat([llvm_i32_ty /* coproc */], args, [llvm_i32_ty /* imm */]),
|
||||||
|
[IntrNoMem, ImmArg<0>, ImmArg<!add(!size(args), 1)>]>;
|
||||||
|
def a : Intrinsic<
|
||||||
|
[llvm_anyfloat_ty],
|
||||||
|
!listconcat([llvm_i32_ty /* coproc */, LLVMMatchType<0> /* acc */],
|
||||||
|
args, [llvm_i32_ty /* imm */]),
|
||||||
|
[IntrNoMem, ImmArg<0>, ImmArg<!add(!size(args), 2)>]>;
|
||||||
|
}
|
||||||
|
|
||||||
|
defm int_arm_cde_vcx1 : CDEVCXIntrinsics<[]>;
|
||||||
|
defm int_arm_cde_vcx2 : CDEVCXIntrinsics<[LLVMMatchType<0>]>;
|
||||||
|
defm int_arm_cde_vcx3 : CDEVCXIntrinsics<[LLVMMatchType<0>, LLVMMatchType<0>]>;
|
||||||
|
|
||||||
} // end TargetPrefix
|
} // end TargetPrefix
|
||||||
|
|
|
@ -542,3 +542,42 @@ def CDE_VCX3_fpdp : CDE_VCX3_FP_Instr_D<"vcx3", cde_vcx_params_d_noacc>;
|
||||||
def CDE_VCX3A_fpdp : CDE_VCX3_FP_Instr_D<"vcx3a", cde_vcx_params_d_acc>;
|
def CDE_VCX3A_fpdp : CDE_VCX3_FP_Instr_D<"vcx3a", cde_vcx_params_d_acc>;
|
||||||
def CDE_VCX3_vec : CDE_VCX3_Vec_Instr<"vcx3", cde_vcx_params_q_noacc>;
|
def CDE_VCX3_vec : CDE_VCX3_Vec_Instr<"vcx3", cde_vcx_params_q_noacc>;
|
||||||
def CDE_VCX3A_vec : CDE_VCX3_Vec_Instr<"vcx3a", cde_vcx_params_q_acc>;
|
def CDE_VCX3A_vec : CDE_VCX3_Vec_Instr<"vcx3a", cde_vcx_params_q_acc>;
|
||||||
|
|
||||||
|
|
||||||
|
let Predicates = [HasCDE, HasFPRegs] in {
|
||||||
|
def : Pat<(f32 (int_arm_cde_vcx1 timm:$coproc, timm:$imm)),
|
||||||
|
(f32 (CDE_VCX1_fpsp p_imm:$coproc, imm_11b:$imm))>;
|
||||||
|
def : Pat<(f32 (int_arm_cde_vcx1a timm:$coproc, (f32 SPR:$acc), timm:$imm)),
|
||||||
|
(f32 (CDE_VCX1A_fpsp p_imm:$coproc, SPR:$acc, imm_11b:$imm))>;
|
||||||
|
def : Pat<(f64 (int_arm_cde_vcx1 timm:$coproc, timm:$imm)),
|
||||||
|
(f64 (CDE_VCX1_fpdp p_imm:$coproc, imm_11b:$imm))>;
|
||||||
|
def : Pat<(f64 (int_arm_cde_vcx1a timm:$coproc, (f64 DPR:$acc), timm:$imm)),
|
||||||
|
(f64 (CDE_VCX1A_fpdp p_imm:$coproc, DPR:$acc, imm_11b:$imm))>;
|
||||||
|
|
||||||
|
def : Pat<(f32 (int_arm_cde_vcx2 timm:$coproc, (f32 SPR:$n), timm:$imm)),
|
||||||
|
(f32 (CDE_VCX2_fpsp p_imm:$coproc, SPR:$n, imm_6b:$imm))>;
|
||||||
|
def : Pat<(f32 (int_arm_cde_vcx2a timm:$coproc, (f32 SPR:$acc), (f32 SPR:$n),
|
||||||
|
timm:$imm)),
|
||||||
|
(f32 (CDE_VCX2A_fpsp p_imm:$coproc, SPR:$acc, SPR:$n, imm_6b:$imm))>;
|
||||||
|
def : Pat<(f64 (int_arm_cde_vcx2 timm:$coproc, (f64 DPR:$n), timm:$imm)),
|
||||||
|
(f64 (CDE_VCX2_fpdp p_imm:$coproc, DPR:$n, imm_6b:$imm))>;
|
||||||
|
def : Pat<(f64 (int_arm_cde_vcx2a timm:$coproc, (f64 DPR:$acc), (f64 DPR:$n),
|
||||||
|
timm:$imm)),
|
||||||
|
(f64 (CDE_VCX2A_fpdp p_imm:$coproc, DPR:$acc, DPR:$n, imm_6b:$imm))>;
|
||||||
|
|
||||||
|
def : Pat<(f32 (int_arm_cde_vcx3 timm:$coproc, (f32 SPR:$n), (f32 SPR:$m),
|
||||||
|
timm:$imm)),
|
||||||
|
(f32 (CDE_VCX3_fpsp p_imm:$coproc, (f32 SPR:$n), (f32 SPR:$m),
|
||||||
|
imm_3b:$imm))>;
|
||||||
|
def : Pat<(f32 (int_arm_cde_vcx3a timm:$coproc, (f32 SPR:$acc), (f32 SPR:$n),
|
||||||
|
(f32 SPR:$m), timm:$imm)),
|
||||||
|
(f32 (CDE_VCX3A_fpsp p_imm:$coproc, SPR:$acc, SPR:$n, SPR:$m,
|
||||||
|
imm_3b:$imm))>;
|
||||||
|
def : Pat<(f64 (int_arm_cde_vcx3 timm:$coproc, (f64 DPR:$n), (f64 DPR:$m),
|
||||||
|
timm:$imm)),
|
||||||
|
(f64 (CDE_VCX3_fpdp p_imm:$coproc, DPR:$n, DPR:$m, imm_3b:$imm))>;
|
||||||
|
def : Pat<(f64 (int_arm_cde_vcx3a timm:$coproc, (f64 DPR:$acc), (f64 DPR:$n),
|
||||||
|
(f64 DPR:$m), timm:$imm)),
|
||||||
|
(f64 (CDE_VCX3A_fpdp p_imm:$coproc, DPR:$acc, DPR:$n, DPR:$m,
|
||||||
|
imm_3b:$imm))>;
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,198 @@
|
||||||
|
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+cdecp0 -mattr=+cdecp1 -mattr=+mve -verify-machineinstrs -o - %s | FileCheck %s
|
||||||
|
; RUN: llc -mtriple=thumbv8m.main -mattr=+cdecp0 -mattr=+cdecp1 -mattr=+fp-armv8d16sp -verify-machineinstrs -o - %s | FileCheck %s
|
||||||
|
|
||||||
|
declare float @llvm.arm.cde.vcx1.f32(i32 immarg, i32 immarg)
|
||||||
|
declare float @llvm.arm.cde.vcx1a.f32(i32 immarg, float, i32 immarg)
|
||||||
|
declare float @llvm.arm.cde.vcx2.f32(i32 immarg, float, i32 immarg)
|
||||||
|
declare float @llvm.arm.cde.vcx2a.f32(i32 immarg, float, float, i32 immarg)
|
||||||
|
declare float @llvm.arm.cde.vcx3.f32(i32 immarg, float, float, i32 immarg)
|
||||||
|
declare float @llvm.arm.cde.vcx3a.f32(i32 immarg, float, float, float, i32 immarg)
|
||||||
|
|
||||||
|
declare double @llvm.arm.cde.vcx1.f64(i32 immarg, i32 immarg)
|
||||||
|
declare double @llvm.arm.cde.vcx1a.f64(i32 immarg, double, i32 immarg)
|
||||||
|
declare double @llvm.arm.cde.vcx2.f64(i32 immarg, double, i32 immarg)
|
||||||
|
declare double @llvm.arm.cde.vcx2a.f64(i32 immarg, double, double, i32 immarg)
|
||||||
|
declare double @llvm.arm.cde.vcx3.f64(i32 immarg, double, double, i32 immarg)
|
||||||
|
declare double @llvm.arm.cde.vcx3a.f64(i32 immarg, double, double, double, i32 immarg)
|
||||||
|
|
||||||
|
define arm_aapcs_vfpcc i32 @test_vcx1_u32() {
|
||||||
|
; CHECK-LABEL: test_vcx1_u32:
|
||||||
|
; CHECK: @ %bb.0: @ %entry
|
||||||
|
; CHECK-NEXT: vcx1 p0, s0, #11
|
||||||
|
; CHECK-NEXT: vmov r0, s0
|
||||||
|
; CHECK-NEXT: bx lr
|
||||||
|
entry:
|
||||||
|
%0 = call float @llvm.arm.cde.vcx1.f32(i32 0, i32 11)
|
||||||
|
%1 = bitcast float %0 to i32
|
||||||
|
ret i32 %1
|
||||||
|
}
|
||||||
|
|
||||||
|
define arm_aapcs_vfpcc i32 @test_vcx1a_u32(i32 %acc) {
|
||||||
|
; CHECK-LABEL: test_vcx1a_u32:
|
||||||
|
; CHECK: @ %bb.0: @ %entry
|
||||||
|
; CHECK-NEXT: vmov s0, r0
|
||||||
|
; CHECK-NEXT: vcx1a p1, s0, #12
|
||||||
|
; CHECK-NEXT: vmov r0, s0
|
||||||
|
; CHECK-NEXT: bx lr
|
||||||
|
entry:
|
||||||
|
%0 = bitcast i32 %acc to float
|
||||||
|
%1 = call float @llvm.arm.cde.vcx1a.f32(i32 1, float %0, i32 12)
|
||||||
|
%2 = bitcast float %1 to i32
|
||||||
|
ret i32 %2
|
||||||
|
}
|
||||||
|
|
||||||
|
define arm_aapcs_vfpcc i32 @test_vcx2_u32(i32 %n) {
|
||||||
|
; CHECK-LABEL: test_vcx2_u32:
|
||||||
|
; CHECK: @ %bb.0: @ %entry
|
||||||
|
; CHECK-NEXT: vmov s0, r0
|
||||||
|
; CHECK-NEXT: vcx2 p0, s0, s0, #21
|
||||||
|
; CHECK-NEXT: vmov r0, s0
|
||||||
|
; CHECK-NEXT: bx lr
|
||||||
|
entry:
|
||||||
|
%0 = bitcast i32 %n to float
|
||||||
|
%1 = call float @llvm.arm.cde.vcx2.f32(i32 0, float %0, i32 21)
|
||||||
|
%2 = bitcast float %1 to i32
|
||||||
|
ret i32 %2
|
||||||
|
}
|
||||||
|
|
||||||
|
define arm_aapcs_vfpcc i32 @test_vcx2a_u32(i32 %acc, i32 %n) {
|
||||||
|
; CHECK-LABEL: test_vcx2a_u32:
|
||||||
|
; CHECK: @ %bb.0: @ %entry
|
||||||
|
; CHECK-NEXT: vmov s0, r1
|
||||||
|
; CHECK-NEXT: vmov s2, r0
|
||||||
|
; CHECK-NEXT: vcx2a p0, s2, s0, #22
|
||||||
|
; CHECK-NEXT: vmov r0, s2
|
||||||
|
; CHECK-NEXT: bx lr
|
||||||
|
entry:
|
||||||
|
%0 = bitcast i32 %acc to float
|
||||||
|
%1 = bitcast i32 %n to float
|
||||||
|
%2 = call float @llvm.arm.cde.vcx2a.f32(i32 0, float %0, float %1, i32 22)
|
||||||
|
%3 = bitcast float %2 to i32
|
||||||
|
ret i32 %3
|
||||||
|
}
|
||||||
|
|
||||||
|
define arm_aapcs_vfpcc i32 @test_vcx3_u32(i32 %n, i32 %m) {
|
||||||
|
; CHECK-LABEL: test_vcx3_u32:
|
||||||
|
; CHECK: @ %bb.0: @ %entry
|
||||||
|
; CHECK-NEXT: vmov s0, r1
|
||||||
|
; CHECK-NEXT: vmov s2, r0
|
||||||
|
; CHECK-NEXT: vcx3 p1, s0, s2, s0, #3
|
||||||
|
; CHECK-NEXT: vmov r0, s0
|
||||||
|
; CHECK-NEXT: bx lr
|
||||||
|
entry:
|
||||||
|
%0 = bitcast i32 %n to float
|
||||||
|
%1 = bitcast i32 %m to float
|
||||||
|
%2 = call float @llvm.arm.cde.vcx3.f32(i32 1, float %0, float %1, i32 3)
|
||||||
|
%3 = bitcast float %2 to i32
|
||||||
|
ret i32 %3
|
||||||
|
}
|
||||||
|
|
||||||
|
define arm_aapcs_vfpcc i32 @test_vcx3a_u32(i32 %acc, i32 %n, i32 %m) {
|
||||||
|
; CHECK-LABEL: test_vcx3a_u32:
|
||||||
|
; CHECK: @ %bb.0: @ %entry
|
||||||
|
; CHECK-NEXT: vmov s0, r2
|
||||||
|
; CHECK-NEXT: vmov s2, r1
|
||||||
|
; CHECK-NEXT: vmov s4, r0
|
||||||
|
; CHECK-NEXT: vcx3a p0, s4, s2, s0, #5
|
||||||
|
; CHECK-NEXT: vmov r0, s4
|
||||||
|
; CHECK-NEXT: bx lr
|
||||||
|
entry:
|
||||||
|
%0 = bitcast i32 %acc to float
|
||||||
|
%1 = bitcast i32 %n to float
|
||||||
|
%2 = bitcast i32 %m to float
|
||||||
|
%3 = call float @llvm.arm.cde.vcx3a.f32(i32 0, float %0, float %1, float %2, i32 5)
|
||||||
|
%4 = bitcast float %3 to i32
|
||||||
|
ret i32 %4
|
||||||
|
}
|
||||||
|
|
||||||
|
define arm_aapcs_vfpcc i64 @test_vcx1d_u64() {
|
||||||
|
; CHECK-LABEL: test_vcx1d_u64:
|
||||||
|
; CHECK: @ %bb.0: @ %entry
|
||||||
|
; CHECK-NEXT: vcx1 p0, d0, #11
|
||||||
|
; CHECK-NEXT: vmov r0, r1, d0
|
||||||
|
; CHECK-NEXT: bx lr
|
||||||
|
entry:
|
||||||
|
%0 = call double @llvm.arm.cde.vcx1.f64(i32 0, i32 11)
|
||||||
|
%1 = bitcast double %0 to i64
|
||||||
|
ret i64 %1
|
||||||
|
}
|
||||||
|
|
||||||
|
define arm_aapcs_vfpcc i64 @test_vcx1da_u64(i64 %acc) {
|
||||||
|
; CHECK-LABEL: test_vcx1da_u64:
|
||||||
|
; CHECK: @ %bb.0: @ %entry
|
||||||
|
; CHECK-NEXT: vmov d0, r0, r1
|
||||||
|
; CHECK-NEXT: vcx1a p1, d0, #12
|
||||||
|
; CHECK-NEXT: vmov r0, r1, d0
|
||||||
|
; CHECK-NEXT: bx lr
|
||||||
|
entry:
|
||||||
|
%0 = bitcast i64 %acc to double
|
||||||
|
%1 = call double @llvm.arm.cde.vcx1a.f64(i32 1, double %0, i32 12)
|
||||||
|
%2 = bitcast double %1 to i64
|
||||||
|
ret i64 %2
|
||||||
|
}
|
||||||
|
|
||||||
|
define arm_aapcs_vfpcc i64 @test_vcx2d_u64(i64 %n) {
|
||||||
|
; CHECK-LABEL: test_vcx2d_u64:
|
||||||
|
; CHECK: @ %bb.0: @ %entry
|
||||||
|
; CHECK-NEXT: vmov d0, r0, r1
|
||||||
|
; CHECK-NEXT: vcx2 p0, d0, d0, #21
|
||||||
|
; CHECK-NEXT: vmov r0, r1, d0
|
||||||
|
; CHECK-NEXT: bx lr
|
||||||
|
entry:
|
||||||
|
%0 = bitcast i64 %n to double
|
||||||
|
%1 = call double @llvm.arm.cde.vcx2.f64(i32 0, double %0, i32 21)
|
||||||
|
%2 = bitcast double %1 to i64
|
||||||
|
ret i64 %2
|
||||||
|
}
|
||||||
|
|
||||||
|
define arm_aapcs_vfpcc i64 @test_vcx2da_u64(i64 %acc, i64 %n) {
|
||||||
|
; CHECK-LABEL: test_vcx2da_u64:
|
||||||
|
; CHECK: @ %bb.0: @ %entry
|
||||||
|
; CHECK-NEXT: vmov d0, r2, r3
|
||||||
|
; CHECK-NEXT: vmov d1, r0, r1
|
||||||
|
; CHECK-NEXT: vcx2a p0, d1, d0, #22
|
||||||
|
; CHECK-NEXT: vmov r0, r1, d1
|
||||||
|
; CHECK-NEXT: bx lr
|
||||||
|
entry:
|
||||||
|
%0 = bitcast i64 %acc to double
|
||||||
|
%1 = bitcast i64 %n to double
|
||||||
|
%2 = call double @llvm.arm.cde.vcx2a.f64(i32 0, double %0, double %1, i32 22)
|
||||||
|
%3 = bitcast double %2 to i64
|
||||||
|
ret i64 %3
|
||||||
|
}
|
||||||
|
|
||||||
|
define arm_aapcs_vfpcc i64 @test_vcx3d_u64(i64 %n, i64 %m) {
|
||||||
|
; CHECK-LABEL: test_vcx3d_u64:
|
||||||
|
; CHECK: @ %bb.0: @ %entry
|
||||||
|
; CHECK-NEXT: vmov d0, r2, r3
|
||||||
|
; CHECK-NEXT: vmov d1, r0, r1
|
||||||
|
; CHECK-NEXT: vcx3 p1, d0, d1, d0, #3
|
||||||
|
; CHECK-NEXT: vmov r0, r1, d0
|
||||||
|
; CHECK-NEXT: bx lr
|
||||||
|
entry:
|
||||||
|
%0 = bitcast i64 %n to double
|
||||||
|
%1 = bitcast i64 %m to double
|
||||||
|
%2 = call double @llvm.arm.cde.vcx3.f64(i32 1, double %0, double %1, i32 3)
|
||||||
|
%3 = bitcast double %2 to i64
|
||||||
|
ret i64 %3
|
||||||
|
}
|
||||||
|
|
||||||
|
define arm_aapcs_vfpcc i64 @test_vcx3da_u64(i64 %acc, i64 %n, i64 %m) {
|
||||||
|
; CHECK-LABEL: test_vcx3da_u64:
|
||||||
|
; CHECK: @ %bb.0: @ %entry
|
||||||
|
; CHECK-NEXT: push {r7, lr}
|
||||||
|
; CHECK-NEXT: ldrd lr, r12, [sp, #8]
|
||||||
|
; CHECK-DAG: vmov [[D0:d.*]], r0, r1
|
||||||
|
; CHECK-DAG: vmov [[D1:d.*]], r2, r3
|
||||||
|
; CHECK-DAG: vmov [[D2:d.*]], lr, r12
|
||||||
|
; CHECK-NEXT: vcx3a p0, [[D0]], [[D1]], [[D2]], #5
|
||||||
|
; CHECK-NEXT: vmov r0, r1, [[D0]]
|
||||||
|
; CHECK-NEXT: pop {r7, pc}
|
||||||
|
entry:
|
||||||
|
%0 = bitcast i64 %acc to double
|
||||||
|
%1 = bitcast i64 %n to double
|
||||||
|
%2 = bitcast i64 %m to double
|
||||||
|
%3 = call double @llvm.arm.cde.vcx3a.f64(i32 0, double %0, double %1, double %2, i32 5)
|
||||||
|
%4 = bitcast double %3 to i64
|
||||||
|
ret i64 %4
|
||||||
|
}
|
Loading…
Reference in New Issue