[ARM,CDE] Implement CDE S and D-register intrinsics

Summary:
This patch implements the following ACLE intrinsics:

  uint32_t __arm_vcx1_u32(int coproc, uint32_t imm);
  uint32_t __arm_vcx1a_u32(int coproc, uint32_t acc, uint32_t imm);
  uint32_t __arm_vcx2_u32(int coproc, uint32_t n, uint32_t imm);
  uint32_t __arm_vcx2a_u32(int coproc, uint32_t acc, uint32_t n, uint32_t imm);
  uint32_t __arm_vcx3_u32(int coproc, uint32_t n, uint32_t m, uint32_t imm);
  uint32_t __arm_vcx3a_u32(int coproc, uint32_t acc, uint32_t n, uint32_t m, uint32_t imm);

  uint64_t __arm_vcx1d_u64(int coproc, uint32_t imm);
  uint64_t __arm_vcx1da_u64(int coproc, uint64_t acc, uint32_t imm);
  uint64_t __arm_vcx2d_u64(int coproc, uint64_t m, uint32_t imm);
  uint64_t __arm_vcx2da_u64(int coproc, uint64_t acc, uint64_t m, uint32_t imm);
  uint64_t __arm_vcx3d_u64(int coproc, uint64_t n, uint64_t m, uint32_t imm);
  uint64_t __arm_vcx3da_u64(int coproc, uint64_t acc, uint64_t n, uint64_t m, uint32_t imm);

Since the semantics of CDE instructions is opaque to the compiler, the
ACLE intrinsics require dedicated LLVM IR intrinsics. The 64-bit and
32-bit variants share the same IR intrinsic.

Reviewers: simon_tatham, MarkMurrayARM, ostannard, dmgreen

Reviewed By: MarkMurrayARM

Subscribers: kristof.beyls, hiraditya, danielkiss, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D76298
This commit is contained in:
Mikhail Maltsev 2020-03-20 14:01:53 +00:00
parent 7a85e3585e
commit d22e661712
7 changed files with 478 additions and 0 deletions

View File

@ -13,6 +13,15 @@
include "arm_mve_defs.td"
// f64 is not defined in arm_mve_defs.td because MVE instructions only work with
// f16 and f32
def f64: PrimitiveType<"f", 64>;
// Float<t> expects t to be a scalar type, and expands to the floating-point
// type of the same width.
class Float<Type t>: ComplexType<(CTO_CopyKind t, f32)>;
def FScalar: Float<Scalar>;
// ACLE CDE intrinsic
class CDEIntrinsic<Type ret, dag args, dag codegen>
: Intrinsic<ret, args, codegen> {
@ -70,3 +79,31 @@ multiclass CDE_CX_m<dag argsImm, dag argsReg, dag cgArgs> {
defm cx1 : CDE_CX_m<(args imm_13b:$imm), (args), (?)>;
defm cx2 : CDE_CX_m<(args imm_9b:$imm), (args u32:$n), (? $n)>;
defm cx3 : CDE_CX_m<(args imm_6b:$imm), (args u32:$n, u32:$m), (? $n, $m)>;
// VCX* instructions operating on VFP registers
multiclass CDE_VCXFP_m<dag argsImm, dag argsReg32, dag argsReg64, dag cgArgs> {
defvar cp = (args imm_coproc:$cp);
let pnt = PNT_None, params = [u32] in {
def "" : CDEIntrinsic<u32, !con(cp, argsReg32, argsImm),
(bitcast !con((CDEIRInt<NAME, [f32]> $cp), cgArgs, (? $imm)),
Scalar)>;
def a : CDEIntrinsic<u32, !con(cp, (args u32:$acc), argsReg32, argsImm),
(bitcast !con((CDEIRInt<NAME # "a", [f32]> $cp,
(bitcast $acc, FScalar)), cgArgs, (? $imm)), Scalar)>;
}
let pnt = PNT_None, params = [u64] in {
def d : CDEIntrinsic<u64, !con(cp, argsReg64, argsImm),
(bitcast !con((CDEIRInt<NAME, [f64]> $cp), cgArgs, (? $imm)),
Scalar)>;
def da : CDEIntrinsic<u64, !con(cp, (args u64:$acc), argsReg64, argsImm),
(bitcast !con((CDEIRInt<NAME # "a", [f64]> $cp,
(bitcast $acc, FScalar)), cgArgs, (? $imm)), Scalar)>;
}
}
defm vcx1: CDE_VCXFP_m<(args imm_11b:$imm), (args), (args), (?)>;
defm vcx2: CDE_VCXFP_m<(args imm_6b:$imm), (args u32:$n), (args u64:$n),
(? (bitcast $n, FScalar))>;
defm vcx3: CDE_VCXFP_m<(args imm_3b:$imm),
(args u32:$n, u32:$m), (args u64:$n, u64:$m),
(? (bitcast $n, FScalar), (bitcast $m, FScalar))>;

View File

@ -0,0 +1,145 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi \
// RUN: -target-feature +cdecp0 -target-feature +cdecp1 \
// RUN: -mfloat-abi hard -O0 -disable-O0-optnone \
// RUN: -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
#include <arm_cde.h>
// CHECK-LABEL: @test_vcx1_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call float @llvm.arm.cde.vcx1.f32(i32 0, i32 11)
// CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[TMP0]] to i32
// CHECK-NEXT: ret i32 [[TMP1]]
//
uint32_t test_vcx1_u32(void) {
return __arm_vcx1_u32(0, 11);
}
// CHECK-LABEL: @test_vcx1a_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 [[ACC:%.*]] to float
// CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.arm.cde.vcx1a.f32(i32 1, float [[TMP0]], i32 12)
// CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[TMP1]] to i32
// CHECK-NEXT: ret i32 [[TMP2]]
//
uint32_t test_vcx1a_u32(uint32_t acc) {
return __arm_vcx1a_u32(1, acc, 12);
}
// CHECK-LABEL: @test_vcx2_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 [[N:%.*]] to float
// CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.arm.cde.vcx2.f32(i32 0, float [[TMP0]], i32 21)
// CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[TMP1]] to i32
// CHECK-NEXT: ret i32 [[TMP2]]
//
uint32_t test_vcx2_u32(uint32_t n) {
return __arm_vcx2_u32(0, n, 21);
}
// CHECK-LABEL: @test_vcx2a_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 [[ACC:%.*]] to float
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[N:%.*]] to float
// CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.arm.cde.vcx2a.f32(i32 0, float [[TMP0]], float [[TMP1]], i32 22)
// CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
// CHECK-NEXT: ret i32 [[TMP3]]
//
uint32_t test_vcx2a_u32(uint32_t acc, uint32_t n) {
return __arm_vcx2a_u32(0, acc, n, 22);
}
// CHECK-LABEL: @test_vcx3_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 [[N:%.*]] to float
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[M:%.*]] to float
// CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.arm.cde.vcx3.f32(i32 1, float [[TMP0]], float [[TMP1]], i32 3)
// CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
// CHECK-NEXT: ret i32 [[TMP3]]
//
uint32_t test_vcx3_u32(uint32_t n, uint32_t m) {
return __arm_vcx3_u32(1, n, m, 3);
}
// CHECK-LABEL: @test_vcx3a_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 [[ACC:%.*]] to float
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[N:%.*]] to float
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[M:%.*]] to float
// CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.arm.cde.vcx3a.f32(i32 0, float [[TMP0]], float [[TMP1]], float [[TMP2]], i32 5)
// CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[TMP3]] to i32
// CHECK-NEXT: ret i32 [[TMP4]]
//
uint32_t test_vcx3a_u32(uint32_t acc, uint32_t n, uint32_t m) {
return __arm_vcx3a_u32(0, acc, n, m, 5);
}
// CHECK-LABEL: @test_vcx1d_u64(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call double @llvm.arm.cde.vcx1.f64(i32 0, i32 11)
// CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[TMP0]] to i64
// CHECK-NEXT: ret i64 [[TMP1]]
//
uint64_t test_vcx1d_u64(void) {
return __arm_vcx1d_u64(0, 11);
}
// CHECK-LABEL: @test_vcx1da_u64(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[ACC:%.*]] to double
// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.arm.cde.vcx1a.f64(i32 1, double [[TMP0]], i32 12)
// CHECK-NEXT: [[TMP2:%.*]] = bitcast double [[TMP1]] to i64
// CHECK-NEXT: ret i64 [[TMP2]]
//
uint64_t test_vcx1da_u64(uint64_t acc) {
return __arm_vcx1da_u64(1, acc, 12);
}
// CHECK-LABEL: @test_vcx2d_u64(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[N:%.*]] to double
// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.arm.cde.vcx2.f64(i32 0, double [[TMP0]], i32 21)
// CHECK-NEXT: [[TMP2:%.*]] = bitcast double [[TMP1]] to i64
// CHECK-NEXT: ret i64 [[TMP2]]
//
uint64_t test_vcx2d_u64(uint64_t n) {
return __arm_vcx2d_u64(0, n, 21);
}
// CHECK-LABEL: @test_vcx2da_u64(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[ACC:%.*]] to double
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[N:%.*]] to double
// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.arm.cde.vcx2a.f64(i32 0, double [[TMP0]], double [[TMP1]], i32 22)
// CHECK-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
// CHECK-NEXT: ret i64 [[TMP3]]
//
uint64_t test_vcx2da_u64(uint64_t acc, uint64_t n) {
return __arm_vcx2da_u64(0, acc, n, 22);
}
// CHECK-LABEL: @test_vcx3d_u64(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[N:%.*]] to double
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[M:%.*]] to double
// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.arm.cde.vcx3.f64(i32 1, double [[TMP0]], double [[TMP1]], i32 3)
// CHECK-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
// CHECK-NEXT: ret i64 [[TMP3]]
//
uint64_t test_vcx3d_u64(uint64_t n, uint64_t m) {
return __arm_vcx3d_u64(1, n, m, 3);
}
// CHECK-LABEL: @test_vcx3da_u64(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[ACC:%.*]] to double
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[N:%.*]] to double
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[M:%.*]] to double
// CHECK-NEXT: [[TMP3:%.*]] = call double @llvm.arm.cde.vcx3a.f64(i32 0, double [[TMP0]], double [[TMP1]], double [[TMP2]], i32 5)
// CHECK-NEXT: [[TMP4:%.*]] = bitcast double [[TMP3]] to i64
// CHECK-NEXT: ret i64 [[TMP4]]
//
uint64_t test_vcx3da_u64(uint64_t acc, uint64_t n, uint64_t m) {
return __arm_vcx3da_u64(0, acc, n, m, 5);
}

View File

@ -63,3 +63,43 @@ void test_cx(uint32_t a, uint64_t da, uint32_t n, uint32_t m) {
__arm_cx3da(0, da, n, m, a); // expected-error {{argument to '__arm_cx3da' must be a constant integer}}
__arm_cx3da(0, da, n, m, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}}
}
void test_vcxfp_u32(uint32_t a, uint32_t n, uint32_t m) {
(void)__arm_vcx1_u32(0, 0);
__arm_vcx1_u32(0, a); // expected-error {{argument to '__arm_vcx1_u32' must be a constant integer}}
__arm_vcx1_u32(0, 2048); // expected-error {{argument value 2048 is outside the valid range [0, 2047]}}
__arm_vcx1a_u32(0, a, a); // expected-error {{argument to '__arm_vcx1a_u32' must be a constant integer}}
__arm_vcx1a_u32(0, a, 2048); // expected-error {{argument value 2048 is outside the valid range [0, 2047]}}
(void)__arm_vcx2_u32(0, n, 0);
__arm_vcx2_u32(0, n, a); // expected-error {{argument to '__arm_vcx2_u32' must be a constant integer}}
__arm_vcx2_u32(0, n, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}}
__arm_vcx2a_u32(0, a, n, a); // expected-error {{argument to '__arm_vcx2a_u32' must be a constant integer}}
__arm_vcx2a_u32(0, a, n, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}}
(void)__arm_vcx3_u32(0, n, m, 0);
__arm_vcx3_u32(0, n, m, a); // expected-error {{argument to '__arm_vcx3_u32' must be a constant integer}}
__arm_vcx3_u32(0, n, m, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
__arm_vcx3a_u32(0, a, n, m, a); // expected-error {{argument to '__arm_vcx3a_u32' must be a constant integer}}
__arm_vcx3a_u32(0, a, n, m, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
}
void test_vcxfp_u64(uint64_t a, uint64_t n, uint64_t m) {
(void)__arm_vcx1d_u64(0, 0);
__arm_vcx1d_u64(0, a); // expected-error {{argument to '__arm_vcx1d_u64' must be a constant integer}}
__arm_vcx1d_u64(0, 2048); // expected-error {{argument value 2048 is outside the valid range [0, 2047]}}
__arm_vcx1da_u64(0, a, a); // expected-error {{argument to '__arm_vcx1da_u64' must be a constant integer}}
__arm_vcx1da_u64(0, a, 2048); // expected-error {{argument value 2048 is outside the valid range [0, 2047]}}
(void)__arm_vcx2d_u64(0, n, 0);
__arm_vcx2d_u64(0, n, a); // expected-error {{argument to '__arm_vcx2d_u64' must be a constant integer}}
__arm_vcx2d_u64(0, n, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}}
__arm_vcx2da_u64(0, a, n, a); // expected-error {{argument to '__arm_vcx2da_u64' must be a constant integer}}
__arm_vcx2da_u64(0, a, n, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}}
(void)__arm_vcx3d_u64(0, n, m, 0);
__arm_vcx3d_u64(0, n, m, a); // expected-error {{argument to '__arm_vcx3d_u64' must be a constant integer}}
__arm_vcx3d_u64(0, n, m, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
__arm_vcx3da_u64(0, a, n, m, a); // expected-error {{argument to '__arm_vcx3da_u64' must be a constant integer}}
__arm_vcx3da_u64(0, a, n, m, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
}

View File

@ -1995,6 +1995,9 @@ void CdeEmitter::EmitHeader(raw_ostream &OS) {
const ScalarType *ST = kv.second.get();
if (ST->hasNonstandardName())
continue;
// We don't have float64x2_t
if (ST->kind() == ScalarTypeKind::Float && ST->sizeInBits() == 64)
continue;
raw_ostream &OS = parts[ST->requiresFloat() ? MVEFloat : MVE];
const VectorType *VT = getVectorType(ST);

View File

@ -1301,4 +1301,20 @@ defm int_arm_cde_cx1: CDEGPRIntrinsics<[]>;
defm int_arm_cde_cx2: CDEGPRIntrinsics<[llvm_i32_ty]>;
defm int_arm_cde_cx3: CDEGPRIntrinsics<[llvm_i32_ty, llvm_i32_ty]>;
multiclass CDEVCXIntrinsics<list<LLVMType> args> {
def "" : Intrinsic<
[llvm_anyfloat_ty],
!listconcat([llvm_i32_ty /* coproc */], args, [llvm_i32_ty /* imm */]),
[IntrNoMem, ImmArg<0>, ImmArg<!add(!size(args), 1)>]>;
def a : Intrinsic<
[llvm_anyfloat_ty],
!listconcat([llvm_i32_ty /* coproc */, LLVMMatchType<0> /* acc */],
args, [llvm_i32_ty /* imm */]),
[IntrNoMem, ImmArg<0>, ImmArg<!add(!size(args), 2)>]>;
}
defm int_arm_cde_vcx1 : CDEVCXIntrinsics<[]>;
defm int_arm_cde_vcx2 : CDEVCXIntrinsics<[LLVMMatchType<0>]>;
defm int_arm_cde_vcx3 : CDEVCXIntrinsics<[LLVMMatchType<0>, LLVMMatchType<0>]>;
} // end TargetPrefix

View File

@ -542,3 +542,42 @@ def CDE_VCX3_fpdp : CDE_VCX3_FP_Instr_D<"vcx3", cde_vcx_params_d_noacc>;
def CDE_VCX3A_fpdp : CDE_VCX3_FP_Instr_D<"vcx3a", cde_vcx_params_d_acc>;
def CDE_VCX3_vec : CDE_VCX3_Vec_Instr<"vcx3", cde_vcx_params_q_noacc>;
def CDE_VCX3A_vec : CDE_VCX3_Vec_Instr<"vcx3a", cde_vcx_params_q_acc>;
let Predicates = [HasCDE, HasFPRegs] in {
def : Pat<(f32 (int_arm_cde_vcx1 timm:$coproc, timm:$imm)),
(f32 (CDE_VCX1_fpsp p_imm:$coproc, imm_11b:$imm))>;
def : Pat<(f32 (int_arm_cde_vcx1a timm:$coproc, (f32 SPR:$acc), timm:$imm)),
(f32 (CDE_VCX1A_fpsp p_imm:$coproc, SPR:$acc, imm_11b:$imm))>;
def : Pat<(f64 (int_arm_cde_vcx1 timm:$coproc, timm:$imm)),
(f64 (CDE_VCX1_fpdp p_imm:$coproc, imm_11b:$imm))>;
def : Pat<(f64 (int_arm_cde_vcx1a timm:$coproc, (f64 DPR:$acc), timm:$imm)),
(f64 (CDE_VCX1A_fpdp p_imm:$coproc, DPR:$acc, imm_11b:$imm))>;
def : Pat<(f32 (int_arm_cde_vcx2 timm:$coproc, (f32 SPR:$n), timm:$imm)),
(f32 (CDE_VCX2_fpsp p_imm:$coproc, SPR:$n, imm_6b:$imm))>;
def : Pat<(f32 (int_arm_cde_vcx2a timm:$coproc, (f32 SPR:$acc), (f32 SPR:$n),
timm:$imm)),
(f32 (CDE_VCX2A_fpsp p_imm:$coproc, SPR:$acc, SPR:$n, imm_6b:$imm))>;
def : Pat<(f64 (int_arm_cde_vcx2 timm:$coproc, (f64 DPR:$n), timm:$imm)),
(f64 (CDE_VCX2_fpdp p_imm:$coproc, DPR:$n, imm_6b:$imm))>;
def : Pat<(f64 (int_arm_cde_vcx2a timm:$coproc, (f64 DPR:$acc), (f64 DPR:$n),
timm:$imm)),
(f64 (CDE_VCX2A_fpdp p_imm:$coproc, DPR:$acc, DPR:$n, imm_6b:$imm))>;
def : Pat<(f32 (int_arm_cde_vcx3 timm:$coproc, (f32 SPR:$n), (f32 SPR:$m),
timm:$imm)),
(f32 (CDE_VCX3_fpsp p_imm:$coproc, (f32 SPR:$n), (f32 SPR:$m),
imm_3b:$imm))>;
def : Pat<(f32 (int_arm_cde_vcx3a timm:$coproc, (f32 SPR:$acc), (f32 SPR:$n),
(f32 SPR:$m), timm:$imm)),
(f32 (CDE_VCX3A_fpsp p_imm:$coproc, SPR:$acc, SPR:$n, SPR:$m,
imm_3b:$imm))>;
def : Pat<(f64 (int_arm_cde_vcx3 timm:$coproc, (f64 DPR:$n), (f64 DPR:$m),
timm:$imm)),
(f64 (CDE_VCX3_fpdp p_imm:$coproc, DPR:$n, DPR:$m, imm_3b:$imm))>;
def : Pat<(f64 (int_arm_cde_vcx3a timm:$coproc, (f64 DPR:$acc), (f64 DPR:$n),
(f64 DPR:$m), timm:$imm)),
(f64 (CDE_VCX3A_fpdp p_imm:$coproc, DPR:$acc, DPR:$n, DPR:$m,
imm_3b:$imm))>;
}

View File

@ -0,0 +1,198 @@
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+cdecp0 -mattr=+cdecp1 -mattr=+mve -verify-machineinstrs -o - %s | FileCheck %s
; RUN: llc -mtriple=thumbv8m.main -mattr=+cdecp0 -mattr=+cdecp1 -mattr=+fp-armv8d16sp -verify-machineinstrs -o - %s | FileCheck %s
declare float @llvm.arm.cde.vcx1.f32(i32 immarg, i32 immarg)
declare float @llvm.arm.cde.vcx1a.f32(i32 immarg, float, i32 immarg)
declare float @llvm.arm.cde.vcx2.f32(i32 immarg, float, i32 immarg)
declare float @llvm.arm.cde.vcx2a.f32(i32 immarg, float, float, i32 immarg)
declare float @llvm.arm.cde.vcx3.f32(i32 immarg, float, float, i32 immarg)
declare float @llvm.arm.cde.vcx3a.f32(i32 immarg, float, float, float, i32 immarg)
declare double @llvm.arm.cde.vcx1.f64(i32 immarg, i32 immarg)
declare double @llvm.arm.cde.vcx1a.f64(i32 immarg, double, i32 immarg)
declare double @llvm.arm.cde.vcx2.f64(i32 immarg, double, i32 immarg)
declare double @llvm.arm.cde.vcx2a.f64(i32 immarg, double, double, i32 immarg)
declare double @llvm.arm.cde.vcx3.f64(i32 immarg, double, double, i32 immarg)
declare double @llvm.arm.cde.vcx3a.f64(i32 immarg, double, double, double, i32 immarg)
define arm_aapcs_vfpcc i32 @test_vcx1_u32() {
; CHECK-LABEL: test_vcx1_u32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcx1 p0, s0, #11
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
entry:
%0 = call float @llvm.arm.cde.vcx1.f32(i32 0, i32 11)
%1 = bitcast float %0 to i32
ret i32 %1
}
define arm_aapcs_vfpcc i32 @test_vcx1a_u32(i32 %acc) {
; CHECK-LABEL: test_vcx1a_u32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov s0, r0
; CHECK-NEXT: vcx1a p1, s0, #12
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
entry:
%0 = bitcast i32 %acc to float
%1 = call float @llvm.arm.cde.vcx1a.f32(i32 1, float %0, i32 12)
%2 = bitcast float %1 to i32
ret i32 %2
}
define arm_aapcs_vfpcc i32 @test_vcx2_u32(i32 %n) {
; CHECK-LABEL: test_vcx2_u32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov s0, r0
; CHECK-NEXT: vcx2 p0, s0, s0, #21
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
entry:
%0 = bitcast i32 %n to float
%1 = call float @llvm.arm.cde.vcx2.f32(i32 0, float %0, i32 21)
%2 = bitcast float %1 to i32
ret i32 %2
}
define arm_aapcs_vfpcc i32 @test_vcx2a_u32(i32 %acc, i32 %n) {
; CHECK-LABEL: test_vcx2a_u32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov s0, r1
; CHECK-NEXT: vmov s2, r0
; CHECK-NEXT: vcx2a p0, s2, s0, #22
; CHECK-NEXT: vmov r0, s2
; CHECK-NEXT: bx lr
entry:
%0 = bitcast i32 %acc to float
%1 = bitcast i32 %n to float
%2 = call float @llvm.arm.cde.vcx2a.f32(i32 0, float %0, float %1, i32 22)
%3 = bitcast float %2 to i32
ret i32 %3
}
define arm_aapcs_vfpcc i32 @test_vcx3_u32(i32 %n, i32 %m) {
; CHECK-LABEL: test_vcx3_u32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov s0, r1
; CHECK-NEXT: vmov s2, r0
; CHECK-NEXT: vcx3 p1, s0, s2, s0, #3
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
entry:
%0 = bitcast i32 %n to float
%1 = bitcast i32 %m to float
%2 = call float @llvm.arm.cde.vcx3.f32(i32 1, float %0, float %1, i32 3)
%3 = bitcast float %2 to i32
ret i32 %3
}
define arm_aapcs_vfpcc i32 @test_vcx3a_u32(i32 %acc, i32 %n, i32 %m) {
; CHECK-LABEL: test_vcx3a_u32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov s0, r2
; CHECK-NEXT: vmov s2, r1
; CHECK-NEXT: vmov s4, r0
; CHECK-NEXT: vcx3a p0, s4, s2, s0, #5
; CHECK-NEXT: vmov r0, s4
; CHECK-NEXT: bx lr
entry:
%0 = bitcast i32 %acc to float
%1 = bitcast i32 %n to float
%2 = bitcast i32 %m to float
%3 = call float @llvm.arm.cde.vcx3a.f32(i32 0, float %0, float %1, float %2, i32 5)
%4 = bitcast float %3 to i32
ret i32 %4
}
define arm_aapcs_vfpcc i64 @test_vcx1d_u64() {
; CHECK-LABEL: test_vcx1d_u64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcx1 p0, d0, #11
; CHECK-NEXT: vmov r0, r1, d0
; CHECK-NEXT: bx lr
entry:
%0 = call double @llvm.arm.cde.vcx1.f64(i32 0, i32 11)
%1 = bitcast double %0 to i64
ret i64 %1
}
define arm_aapcs_vfpcc i64 @test_vcx1da_u64(i64 %acc) {
; CHECK-LABEL: test_vcx1da_u64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov d0, r0, r1
; CHECK-NEXT: vcx1a p1, d0, #12
; CHECK-NEXT: vmov r0, r1, d0
; CHECK-NEXT: bx lr
entry:
%0 = bitcast i64 %acc to double
%1 = call double @llvm.arm.cde.vcx1a.f64(i32 1, double %0, i32 12)
%2 = bitcast double %1 to i64
ret i64 %2
}
define arm_aapcs_vfpcc i64 @test_vcx2d_u64(i64 %n) {
; CHECK-LABEL: test_vcx2d_u64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov d0, r0, r1
; CHECK-NEXT: vcx2 p0, d0, d0, #21
; CHECK-NEXT: vmov r0, r1, d0
; CHECK-NEXT: bx lr
entry:
%0 = bitcast i64 %n to double
%1 = call double @llvm.arm.cde.vcx2.f64(i32 0, double %0, i32 21)
%2 = bitcast double %1 to i64
ret i64 %2
}
define arm_aapcs_vfpcc i64 @test_vcx2da_u64(i64 %acc, i64 %n) {
; CHECK-LABEL: test_vcx2da_u64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov d0, r2, r3
; CHECK-NEXT: vmov d1, r0, r1
; CHECK-NEXT: vcx2a p0, d1, d0, #22
; CHECK-NEXT: vmov r0, r1, d1
; CHECK-NEXT: bx lr
entry:
%0 = bitcast i64 %acc to double
%1 = bitcast i64 %n to double
%2 = call double @llvm.arm.cde.vcx2a.f64(i32 0, double %0, double %1, i32 22)
%3 = bitcast double %2 to i64
ret i64 %3
}
define arm_aapcs_vfpcc i64 @test_vcx3d_u64(i64 %n, i64 %m) {
; CHECK-LABEL: test_vcx3d_u64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov d0, r2, r3
; CHECK-NEXT: vmov d1, r0, r1
; CHECK-NEXT: vcx3 p1, d0, d1, d0, #3
; CHECK-NEXT: vmov r0, r1, d0
; CHECK-NEXT: bx lr
entry:
%0 = bitcast i64 %n to double
%1 = bitcast i64 %m to double
%2 = call double @llvm.arm.cde.vcx3.f64(i32 1, double %0, double %1, i32 3)
%3 = bitcast double %2 to i64
ret i64 %3
}
define arm_aapcs_vfpcc i64 @test_vcx3da_u64(i64 %acc, i64 %n, i64 %m) {
; CHECK-LABEL: test_vcx3da_u64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: ldrd lr, r12, [sp, #8]
; CHECK-DAG: vmov [[D0:d.*]], r0, r1
; CHECK-DAG: vmov [[D1:d.*]], r2, r3
; CHECK-DAG: vmov [[D2:d.*]], lr, r12
; CHECK-NEXT: vcx3a p0, [[D0]], [[D1]], [[D2]], #5
; CHECK-NEXT: vmov r0, r1, [[D0]]
; CHECK-NEXT: pop {r7, pc}
entry:
%0 = bitcast i64 %acc to double
%1 = bitcast i64 %n to double
%2 = bitcast i64 %m to double
%3 = call double @llvm.arm.cde.vcx3a.f64(i32 0, double %0, double %1, double %2, i32 5)
%4 = bitcast double %3 to i64
ret i64 %4
}