[ARM][Clang] Removing lowering of half-precision FP arguments and returns from Clang's CodeGen

Summary:
On the process of moving the argument lowering handling for
half-precision floating point arguments and returns to the backend, this
patch removes the code that was responsible for handling the coercion of
those arguments in Clang's Codegen.

Reviewers: rjmccall, chill, ostannard, dnsampaio

Reviewed By: ostannard

Subscribers: stuij, kristof.beyls, dmgreen, danielkiss, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D81451
This commit is contained in:
Lucas Prates 2020-06-09 09:52:01 +01:00
parent 92ad6d57c2
commit ada4c9dc4a
14 changed files with 197 additions and 482 deletions

View File

@ -3124,20 +3124,6 @@ llvm::Value *CodeGenFunction::EmitCMSEClearRecord(llvm::Value *Src,
return R;
}
// Emit code to clear the padding bits when returning or passing as an argument
// a 16-bit floating-point value.
llvm::Value *CodeGenFunction::EmitCMSEClearFP16(llvm::Value *Src) {
llvm::Type *RetTy = Src->getType();
assert(RetTy->isFloatTy() ||
(RetTy->isIntegerTy() && RetTy->getIntegerBitWidth() == 32));
if (RetTy->isFloatTy()) {
llvm::Value *T0 = Builder.CreateBitCast(Src, Builder.getIntNTy(32));
llvm::Value *T1 = Builder.CreateAnd(T0, 0xffff, "cmse.clear");
return Builder.CreateBitCast(T1, RetTy);
}
return Builder.CreateAnd(Src, 0xffff, "cmse.clear");
}
void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
bool EmitRetDbgLoc,
SourceLocation EndLoc) {
@ -3307,17 +3293,10 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
if (CurFuncDecl && CurFuncDecl->hasAttr<CmseNSEntryAttr>()) {
// For certain return types, clear padding bits, as they may reveal
// sensitive information.
const Type *RTy = RetTy.getCanonicalType().getTypePtr();
if (RTy->isFloat16Type() || RTy->isHalfType()) {
// 16-bit floating-point types are passed in a 32-bit integer or float,
// with unspecified upper bits.
RV = EmitCMSEClearFP16(RV);
} else {
// Small struct/union types are passed as integers.
auto *ITy = dyn_cast<llvm::IntegerType>(RV->getType());
if (ITy != nullptr && isa<RecordType>(RetTy.getCanonicalType()))
RV = EmitCMSEClearRecord(RV, ITy, RetTy);
}
// Small struct/union types are passed as integers.
auto *ITy = dyn_cast<llvm::IntegerType>(RV->getType());
if (ITy != nullptr && isa<RecordType>(RetTy.getCanonicalType()))
RV = EmitCMSEClearRecord(RV, ITy, RetTy);
}
EmitReturnValueCheck(RV);
Ret = Builder.CreateRet(RV);
@ -4620,17 +4599,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
if (CallInfo.isCmseNSCall()) {
// For certain parameter types, clear padding bits, as they may reveal
// sensitive information.
const Type *PTy = I->Ty.getCanonicalType().getTypePtr();
// 16-bit floating-point types are passed in a 32-bit integer or
// float, with unspecified upper bits.
if (PTy->isFloat16Type() || PTy->isHalfType()) {
Load = EmitCMSEClearFP16(Load);
} else {
// Small struct/union types are passed as integer arrays.
auto *ATy = dyn_cast<llvm::ArrayType>(Load->getType());
if (ATy != nullptr && isa<RecordType>(I->Ty.getCanonicalType()))
Load = EmitCMSEClearRecord(Load, ATy, I->Ty);
}
// Small struct/union types are passed as integer arrays.
auto *ATy = dyn_cast<llvm::ArrayType>(Load->getType());
if (ATy != nullptr && isa<RecordType>(I->Ty.getCanonicalType()))
Load = EmitCMSEClearRecord(Load, ATy, I->Ty);
}
IRCallArgs[FirstIRArg] = Load;
}

View File

@ -3923,7 +3923,6 @@ public:
QualType RTy);
llvm::Value *EmitCMSEClearRecord(llvm::Value *V, llvm::ArrayType *ATy,
QualType RTy);
llvm::Value *EmitCMSEClearFP16(llvm::Value *V);
llvm::Value *EmitCommonNeonBuiltinExpr(unsigned BuiltinID,
unsigned LLVMIntrinsic,

View File

@ -6265,17 +6265,6 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic,
if (isIllegalVectorType(Ty))
return coerceIllegalVector(Ty);
// _Float16 and __fp16 get passed as if it were an int or float, but
// with the top 16 bits unspecified. This is not done for OpenCL as it handles
// the half type natively, and does not need to interwork with AAPCS code.
if ((Ty->isFloat16Type() || Ty->isHalfType()) &&
!getContext().getLangOpts().NativeHalfArgsAndReturns) {
llvm::Type *ResType = IsAAPCS_VFP ?
llvm::Type::getFloatTy(getVMContext()) :
llvm::Type::getInt32Ty(getVMContext());
return ABIArgInfo::getDirect(ResType);
}
// __bf16 gets passed using the bfloat IR type, or using i32 but
// with the top 16 bits unspecified.
if (Ty->isBFloat16Type() && IsFloatABISoftFP) {
@ -6486,17 +6475,6 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, bool isVariadic,
return coerceIllegalVector(RetTy);
}
// _Float16 and __fp16 get returned as if it were an int or float, but with
// the top 16 bits unspecified. This is not done for OpenCL as it handles the
// half type natively, and does not need to interwork with AAPCS code.
if ((RetTy->isFloat16Type() || RetTy->isHalfType()) &&
!getContext().getLangOpts().NativeHalfArgsAndReturns) {
llvm::Type *ResType = IsAAPCS_VFP ?
llvm::Type::getFloatTy(getVMContext()) :
llvm::Type::getInt32Ty(getVMContext());
return ABIArgInfo::getDirect(ResType);
}
// if we're using the softfp float abi, __bf16 get returned as if it were an
// int but with the top 16 bits unspecified.
if (RetTy->isBFloat16Type()) {

View File

@ -1,51 +1,33 @@
// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi soft -fallow-half-arguments-and-returns -emit-llvm -o - -O2 %s | FileCheck %s --check-prefix=CHECK --check-prefix=SOFT
// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi hard -fallow-half-arguments-and-returns -emit-llvm -o - -O2 %s | FileCheck %s --check-prefix=CHECK --check-prefix=HARD
// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi soft -fnative-half-arguments-and-returns -emit-llvm -o - -O2 %s | FileCheck %s --check-prefix=NATIVE
// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi soft -fnative-half-arguments-and-returns -emit-llvm -o - -O2 %s | FileCheck %s --check-prefix=CHECK --check-prefix=NATIVE
__fp16 g;
void t1(__fp16 a) { g = a; }
// SOFT: define void @t1(i32 [[PARAM:%.*]])
// SOFT: [[TRUNC:%.*]] = trunc i32 [[PARAM]] to i16
// HARD: define arm_aapcs_vfpcc void @t1(float [[PARAM:%.*]])
// HARD: [[BITCAST:%.*]] = bitcast float [[PARAM]] to i32
// HARD: [[TRUNC:%.*]] = trunc i32 [[BITCAST]] to i16
// CHECK: store i16 [[TRUNC]], i16* bitcast (half* @g to i16*)
// SOFT: define void @t1(half [[PARAM:%.*]])
// HARD: define arm_aapcs_vfpcc void @t1(half [[PARAM:%.*]])
// NATIVE: define void @t1(half [[PARAM:%.*]])
// NATIVE: store half [[PARAM]], half* @g
// CHECK: store half [[PARAM]], half* @g
__fp16 t2() { return g; }
// SOFT: define i32 @t2()
// HARD: define arm_aapcs_vfpcc float @t2()
// SOFT: define half @t2()
// HARD: define arm_aapcs_vfpcc half @t2()
// NATIVE: define half @t2()
// CHECK: [[LOAD:%.*]] = load i16, i16* bitcast (half* @g to i16*)
// CHECK: [[ZEXT:%.*]] = zext i16 [[LOAD]] to i32
// SOFT: ret i32 [[ZEXT]]
// HARD: [[BITCAST:%.*]] = bitcast i32 [[ZEXT]] to float
// HARD: ret float [[BITCAST]]
// NATIVE: [[LOAD:%.*]] = load half, half* @g
// NATIVE: ret half [[LOAD]]
// CHECK: [[LOAD:%.*]] = load half, half* @g
// CHECK: ret half [[LOAD]]
_Float16 h;
void t3(_Float16 a) { h = a; }
// SOFT: define void @t3(i32 [[PARAM:%.*]])
// SOFT: [[TRUNC:%.*]] = trunc i32 [[PARAM]] to i16
// HARD: define arm_aapcs_vfpcc void @t3(float [[PARAM:%.*]])
// HARD: [[BITCAST:%.*]] = bitcast float [[PARAM]] to i32
// HARD: [[TRUNC:%.*]] = trunc i32 [[BITCAST]] to i16
// CHECK: store i16 [[TRUNC]], i16* bitcast (half* @h to i16*)
// SOFT: define void @t3(half [[PARAM:%.*]])
// HARD: define arm_aapcs_vfpcc void @t3(half [[PARAM:%.*]])
// NATIVE: define void @t3(half [[PARAM:%.*]])
// NATIVE: store half [[PARAM]], half* @h
// CHECK: store half [[PARAM]], half* @h
_Float16 t4() { return h; }
// SOFT: define i32 @t4()
// HARD: define arm_aapcs_vfpcc float @t4()
// SOFT: define half @t4()
// HARD: define arm_aapcs_vfpcc half @t4()
// NATIVE: define half @t4()
// CHECK: [[LOAD:%.*]] = load i16, i16* bitcast (half* @h to i16*)
// CHECK: [[ZEXT:%.*]] = zext i16 [[LOAD]] to i32
// SOFT: ret i32 [[ZEXT]]
// HARD: [[BITCAST:%.*]] = bitcast i32 [[ZEXT]] to float
// HARD: ret float [[BITCAST]]
// NATIVE: [[LOAD:%.*]] = load half, half* @h
// NATIVE: ret half [[LOAD]]
// CHECK: [[LOAD:%.*]] = load half, half* @h
// CHECK: ret half [[LOAD]]

View File

@ -134,15 +134,12 @@ mve_pred16_t test_vcmpeqq_u32(uint32x4_t a, uint32x4_t b)
// CHECK-LABEL: @test_vcmpeqq_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B_COERCE:%.*]] to i32
// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
// CHECK-NEXT: ret i16 [[TMP4]]
// CHECK-NEXT: [[TMP0:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
// CHECK-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpeqq_n_f16(float16x8_t a, float16_t b)
{
@ -433,18 +430,15 @@ mve_pred16_t test_vcmpeqq_m_u32(uint32x4_t a, uint32x4_t b, mve_pred16_t p)
// CHECK-LABEL: @test_vcmpeqq_m_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B_COERCE:%.*]] to i32
// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half
// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP4:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP5:%.*]] = and <8 x i1> [[TMP3]], [[TMP4]]
// CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP5]])
// CHECK-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
// CHECK-NEXT: ret i16 [[TMP7]]
// CHECK-NEXT: [[TMP2:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]])
// CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
// CHECK-NEXT: ret i16 [[TMP5]]
//
mve_pred16_t test_vcmpeqq_m_n_f16(float16x8_t a, float16_t b, mve_pred16_t p)
{
@ -732,15 +726,12 @@ mve_pred16_t test_vcmpneq_u32(uint32x4_t a, uint32x4_t b)
// CHECK-LABEL: @test_vcmpneq_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B_COERCE:%.*]] to i32
// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = fcmp une <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
// CHECK-NEXT: ret i16 [[TMP4]]
// CHECK-NEXT: [[TMP0:%.*]] = fcmp une <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
// CHECK-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpneq_n_f16(float16x8_t a, float16_t b)
{
@ -1031,18 +1022,15 @@ mve_pred16_t test_vcmpneq_m_u32(uint32x4_t a, uint32x4_t b, mve_pred16_t p)
// CHECK-LABEL: @test_vcmpneq_m_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B_COERCE:%.*]] to i32
// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half
// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP4:%.*]] = fcmp une <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP5:%.*]] = and <8 x i1> [[TMP3]], [[TMP4]]
// CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP5]])
// CHECK-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
// CHECK-NEXT: ret i16 [[TMP7]]
// CHECK-NEXT: [[TMP2:%.*]] = fcmp une <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]])
// CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
// CHECK-NEXT: ret i16 [[TMP5]]
//
mve_pred16_t test_vcmpneq_m_n_f16(float16x8_t a, float16_t b, mve_pred16_t p)
{
@ -1330,15 +1318,12 @@ mve_pred16_t test_vcmpcsq_u32(uint32x4_t a, uint32x4_t b)
// CHECK-LABEL: @test_vcmpgeq_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B_COERCE:%.*]] to i32
// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = fcmp oge <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
// CHECK-NEXT: ret i16 [[TMP4]]
// CHECK-NEXT: [[TMP0:%.*]] = fcmp oge <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
// CHECK-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpgeq_n_f16(float16x8_t a, float16_t b)
{
@ -1629,18 +1614,15 @@ mve_pred16_t test_vcmpcsq_m_u32(uint32x4_t a, uint32x4_t b, mve_pred16_t p)
// CHECK-LABEL: @test_vcmpgeq_m_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B_COERCE:%.*]] to i32
// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half
// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP4:%.*]] = fcmp oge <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP5:%.*]] = and <8 x i1> [[TMP3]], [[TMP4]]
// CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP5]])
// CHECK-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
// CHECK-NEXT: ret i16 [[TMP7]]
// CHECK-NEXT: [[TMP2:%.*]] = fcmp oge <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]])
// CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
// CHECK-NEXT: ret i16 [[TMP5]]
//
mve_pred16_t test_vcmpgeq_m_n_f16(float16x8_t a, float16_t b, mve_pred16_t p)
{
@ -1928,15 +1910,12 @@ mve_pred16_t test_vcmphiq_u32(uint32x4_t a, uint32x4_t b)
// CHECK-LABEL: @test_vcmpgtq_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B_COERCE:%.*]] to i32
// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = fcmp ogt <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
// CHECK-NEXT: ret i16 [[TMP4]]
// CHECK-NEXT: [[TMP0:%.*]] = fcmp ogt <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
// CHECK-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpgtq_n_f16(float16x8_t a, float16_t b)
{
@ -2227,18 +2206,15 @@ mve_pred16_t test_vcmphiq_m_u32(uint32x4_t a, uint32x4_t b, mve_pred16_t p)
// CHECK-LABEL: @test_vcmpgtq_m_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B_COERCE:%.*]] to i32
// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half
// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP4:%.*]] = fcmp ogt <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP5:%.*]] = and <8 x i1> [[TMP3]], [[TMP4]]
// CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP5]])
// CHECK-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
// CHECK-NEXT: ret i16 [[TMP7]]
// CHECK-NEXT: [[TMP2:%.*]] = fcmp ogt <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]])
// CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
// CHECK-NEXT: ret i16 [[TMP5]]
//
mve_pred16_t test_vcmpgtq_m_n_f16(float16x8_t a, float16_t b, mve_pred16_t p)
{
@ -2478,15 +2454,12 @@ mve_pred16_t test_vcmpleq_s32(int32x4_t a, int32x4_t b)
// CHECK-LABEL: @test_vcmpleq_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B_COERCE:%.*]] to i32
// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = fcmp ole <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
// CHECK-NEXT: ret i16 [[TMP4]]
// CHECK-NEXT: [[TMP0:%.*]] = fcmp ole <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
// CHECK-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpleq_n_f16(float16x8_t a, float16_t b)
{
@ -2666,18 +2639,15 @@ mve_pred16_t test_vcmpleq_m_s32(int32x4_t a, int32x4_t b, mve_pred16_t p)
// CHECK-LABEL: @test_vcmpleq_m_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B_COERCE:%.*]] to i32
// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half
// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP4:%.*]] = fcmp ole <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP5:%.*]] = and <8 x i1> [[TMP3]], [[TMP4]]
// CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP5]])
// CHECK-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
// CHECK-NEXT: ret i16 [[TMP7]]
// CHECK-NEXT: [[TMP2:%.*]] = fcmp ole <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]])
// CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
// CHECK-NEXT: ret i16 [[TMP5]]
//
mve_pred16_t test_vcmpleq_m_n_f16(float16x8_t a, float16_t b, mve_pred16_t p)
{
@ -2854,15 +2824,12 @@ mve_pred16_t test_vcmpltq_s32(int32x4_t a, int32x4_t b)
// CHECK-LABEL: @test_vcmpltq_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B_COERCE:%.*]] to i32
// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = fcmp olt <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
// CHECK-NEXT: ret i16 [[TMP4]]
// CHECK-NEXT: [[TMP0:%.*]] = fcmp olt <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
// CHECK-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpltq_n_f16(float16x8_t a, float16_t b)
{
@ -3042,18 +3009,15 @@ mve_pred16_t test_vcmpltq_m_s32(int32x4_t a, int32x4_t b, mve_pred16_t p)
// CHECK-LABEL: @test_vcmpltq_m_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B_COERCE:%.*]] to i32
// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half
// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP4:%.*]] = fcmp olt <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP5:%.*]] = and <8 x i1> [[TMP3]], [[TMP4]]
// CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP5]])
// CHECK-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
// CHECK-NEXT: ret i16 [[TMP7]]
// CHECK-NEXT: [[TMP2:%.*]] = fcmp olt <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]])
// CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
// CHECK-NEXT: ret i16 [[TMP5]]
//
mve_pred16_t test_vcmpltq_m_n_f16(float16x8_t a, float16_t b, mve_pred16_t p)
{

View File

@ -78,19 +78,12 @@ mve_pred16_t test_vcmpeqq_f16(float16x8_t a, float16x8_t b)
// CHECK-LABEL: @_Z18test_vcmpeqq_n_f1619__simd128_float16_tDh(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[B:%.*]] = alloca half, align 2
// CHECK-NEXT: [[TMP:%.*]] = alloca float, align 4
// CHECK-NEXT: store float [[B_COERCE:%.*]], float* [[TMP]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = bitcast half* [[B]] to i8*
// CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[TMP]] to i8*
// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP0]], i8* align 4 [[TMP1]], i32 2, i1 false)
// CHECK-NEXT: [[B1:%.*]] = load half, half* [[B]], align 2
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B1]], i32 0
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
// CHECK-NEXT: ret i16 [[TMP4]]
// CHECK-NEXT: [[TMP0:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
// CHECK-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpeqq_n_f16(float16x8_t a, float16_t b)
{

View File

@ -6,10 +6,7 @@
// CHECK-LABEL: @test_vdupq_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A_COERCE:%.*]] to i32
// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: ret <8 x half> [[DOTSPLAT]]
//
@ -97,15 +94,12 @@ uint32x4_t test_vdupq_n_u32(uint32_t a)
// CHECK-LABEL: @test_vdupq_m_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A_COERCE:%.*]] to i32
// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half
// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP3]], <8 x half> [[DOTSPLAT]], <8 x half> [[INACTIVE:%.*]]
// CHECK-NEXT: ret <8 x half> [[TMP4]]
// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x half> [[DOTSPLAT]], <8 x half> [[INACTIVE:%.*]]
// CHECK-NEXT: ret <8 x half> [[TMP2]]
//
float16x8_t test_vdupq_m_n_f16(float16x8_t inactive, float16_t a, mve_pred16_t p)
{
@ -244,12 +238,9 @@ uint32x4_t test_vdupq_m_n_u32(uint32x4_t inactive, uint32_t a, mve_pred16_t p)
// CHECK-LABEL: @test_vdupq_x_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A_COERCE:%.*]] to i32
// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half
// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: ret <8 x half> [[DOTSPLAT]]
//

View File

@ -7,10 +7,7 @@
// CHECK-LABEL: @test_vgetq_lane_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = extractelement <8 x half> [[A:%.*]], i32 2
// CHECK-NEXT: [[TMP1:%.*]] = bitcast half [[TMP0]] to i16
// CHECK-NEXT: [[TMP_0_INSERT_EXT:%.*]] = zext i16 [[TMP1]] to i32
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP_0_INSERT_EXT]] to float
// CHECK-NEXT: ret float [[TMP2]]
// CHECK-NEXT: ret half [[TMP0]]
//
float16_t test_vgetq_lane_f16(float16x8_t a)
{
@ -149,11 +146,8 @@ uint8_t test_vgetq_lane_u8(uint8x16_t a)
// CHECK-LABEL: @test_vsetq_lane_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A_COERCE:%.*]] to i32
// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half
// CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x half> [[B:%.*]], half [[TMP1]], i32 4
// CHECK-NEXT: ret <8 x half> [[TMP2]]
// CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x half> [[B:%.*]], half [[A:%.*]], i32 4
// CHECK-NEXT: ret <8 x half> [[TMP0]]
//
float16x8_t test_vsetq_lane_f16(float16_t a, float16x8_t b)
{

View File

@ -32,13 +32,10 @@ float32x4_t test_vfmaq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
// CHECK-LABEL: @test_vfmaq_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[C_COERCE:%.*]] to i32
// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[B:%.*]], <8 x half> [[DOTSPLAT]], <8 x half> [[A:%.*]])
// CHECK-NEXT: ret <8 x half> [[TMP2]]
// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[B:%.*]], <8 x half> [[DOTSPLAT]], <8 x half> [[A:%.*]])
// CHECK-NEXT: ret <8 x half> [[TMP0]]
//
float16x8_t test_vfmaq_n_f16(float16x8_t a, float16x8_t b, float16_t c) {
#ifdef POLYMORPHIC
@ -65,13 +62,10 @@ float32x4_t test_vfmaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
// CHECK-LABEL: @test_vfmasq_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[C_COERCE:%.*]] to i32
// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x half> [[DOTSPLAT]])
// CHECK-NEXT: ret <8 x half> [[TMP2]]
// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x half> [[DOTSPLAT]])
// CHECK-NEXT: ret <8 x half> [[TMP0]]
//
float16x8_t test_vfmasq_n_f16(float16x8_t a, float16x8_t b, float16_t c) {
#ifdef POLYMORPHIC
@ -319,7 +313,7 @@ uint32x4_t test_vmlasq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) {
// CHECK-LABEL: @test_vqdmlahq_n_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.arm.mve.vqdmlah.v16i8(<16 x i8> [[B:%.*]], <16 x i8> [[A:%.*]], i32 [[TMP0]])
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.arm.mve.vqdmlah.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 [[TMP0]])
// CHECK-NEXT: ret <16 x i8> [[TMP1]]
//
int8x16_t test_vqdmlahq_n_s8(int8x16_t a, int8x16_t b, int8_t c) {
@ -333,7 +327,7 @@ int8x16_t test_vqdmlahq_n_s8(int8x16_t a, int8x16_t b, int8_t c) {
// CHECK-LABEL: @test_vqdmlahq_n_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.arm.mve.vqdmlah.v8i16(<8 x i16> [[B:%.*]], <8 x i16> [[A:%.*]], i32 [[TMP0]])
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.arm.mve.vqdmlah.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 [[TMP0]])
// CHECK-NEXT: ret <8 x i16> [[TMP1]]
//
int16x8_t test_vqdmlahq_n_s16(int16x8_t a, int16x8_t b, int16_t c) {
@ -346,7 +340,7 @@ int16x8_t test_vqdmlahq_n_s16(int16x8_t a, int16x8_t b, int16_t c) {
// CHECK-LABEL: @test_vqdmlahq_n_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vqdmlah.v4i32(<4 x i32> [[B:%.*]], <4 x i32> [[A:%.*]], i32 [[C:%.*]])
// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vqdmlah.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 [[C:%.*]])
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
int32x4_t test_vqdmlahq_n_s32(int32x4_t a, int32x4_t b, int32_t c) {
@ -401,7 +395,7 @@ int32x4_t test_vqdmlashq_n_s32(int32x4_t m1, int32x4_t m2, int32_t add) {
// CHECK-LABEL: @test_vqrdmlahq_n_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.arm.mve.vqrdmlah.v16i8(<16 x i8> [[B:%.*]], <16 x i8> [[A:%.*]], i32 [[TMP0]])
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.arm.mve.vqrdmlah.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 [[TMP0]])
// CHECK-NEXT: ret <16 x i8> [[TMP1]]
//
int8x16_t test_vqrdmlahq_n_s8(int8x16_t a, int8x16_t b, int8_t c) {
@ -415,7 +409,7 @@ int8x16_t test_vqrdmlahq_n_s8(int8x16_t a, int8x16_t b, int8_t c) {
// CHECK-LABEL: @test_vqrdmlahq_n_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.arm.mve.vqrdmlah.v8i16(<8 x i16> [[B:%.*]], <8 x i16> [[A:%.*]], i32 [[TMP0]])
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.arm.mve.vqrdmlah.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 [[TMP0]])
// CHECK-NEXT: ret <8 x i16> [[TMP1]]
//
int16x8_t test_vqrdmlahq_n_s16(int16x8_t a, int16x8_t b, int16_t c) {
@ -428,7 +422,7 @@ int16x8_t test_vqrdmlahq_n_s16(int16x8_t a, int16x8_t b, int16_t c) {
// CHECK-LABEL: @test_vqrdmlahq_n_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vqrdmlah.v4i32(<4 x i32> [[B:%.*]], <4 x i32> [[A:%.*]], i32 [[C:%.*]])
// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vqrdmlah.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 [[C:%.*]])
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
int32x4_t test_vqrdmlahq_n_s32(int32x4_t a, int32x4_t b, int32_t c) {
@ -512,15 +506,12 @@ float32x4_t test_vfmaq_m_f32(float32x4_t a, float32x4_t b, float32x4_t c, mve_pr
// CHECK-LABEL: @test_vfmaq_m_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[C_COERCE:%.*]] to i32
// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = call <8 x half> @llvm.arm.mve.fma.predicated.v8f16.v8i1(<8 x half> [[B:%.*]], <8 x half> [[DOTSPLAT]], <8 x half> [[A:%.*]], <8 x i1> [[TMP3]])
// CHECK-NEXT: ret <8 x half> [[TMP4]]
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.fma.predicated.v8f16.v8i1(<8 x half> [[B:%.*]], <8 x half> [[DOTSPLAT]], <8 x half> [[A:%.*]], <8 x i1> [[TMP1]])
// CHECK-NEXT: ret <8 x half> [[TMP2]]
//
float16x8_t test_vfmaq_m_n_f16(float16x8_t a, float16x8_t b, float16_t c, mve_pred16_t p) {
#ifdef POLYMORPHIC
@ -549,15 +540,12 @@ float32x4_t test_vfmaq_m_n_f32(float32x4_t a, float32x4_t b, float32_t c, mve_pr
// CHECK-LABEL: @test_vfmasq_m_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[C_COERCE:%.*]] to i32
// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = call <8 x half> @llvm.arm.mve.fma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP3]])
// CHECK-NEXT: ret <8 x half> [[TMP4]]
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.fma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP1]])
// CHECK-NEXT: ret <8 x half> [[TMP2]]
//
float16x8_t test_vfmasq_m_n_f16(float16x8_t a, float16x8_t b, float16_t c, mve_pred16_t p) {
#ifdef POLYMORPHIC
@ -621,7 +609,7 @@ float32x4_t test_vfmsq_m_f32(float32x4_t a, float32x4_t b, float32x4_t c, mve_pr
// CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.arm.mve.vmla.n.predicated.v16i8.v16i1(<16 x i8> [[B:%.*]], <16 x i8> [[A:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]])
// CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.arm.mve.vmla.n.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]])
// CHECK-NEXT: ret <16 x i8> [[TMP3]]
//
int8x16_t test_vmlaq_m_n_s8(int8x16_t a, int8x16_t b, int8_t c, mve_pred16_t p) {
@ -637,7 +625,7 @@ int8x16_t test_vmlaq_m_n_s8(int8x16_t a, int8x16_t b, int8_t c, mve_pred16_t p)
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.arm.mve.vmla.n.predicated.v8i16.v8i1(<8 x i16> [[B:%.*]], <8 x i16> [[A:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]])
// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.arm.mve.vmla.n.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]])
// CHECK-NEXT: ret <8 x i16> [[TMP3]]
//
int16x8_t test_vmlaq_m_n_s16(int16x8_t a, int16x8_t b, int16_t c, mve_pred16_t p) {
@ -652,7 +640,7 @@ int16x8_t test_vmlaq_m_n_s16(int16x8_t a, int16x8_t b, int16_t c, mve_pred16_t p
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmla.n.predicated.v4i32.v4i1(<4 x i32> [[B:%.*]], <4 x i32> [[A:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]])
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmla.n.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]])
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
int32x4_t test_vmlaq_m_n_s32(int32x4_t a, int32x4_t b, int32_t c, mve_pred16_t p) {
@ -668,7 +656,7 @@ int32x4_t test_vmlaq_m_n_s32(int32x4_t a, int32x4_t b, int32_t c, mve_pred16_t p
// CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.arm.mve.vmla.n.predicated.v16i8.v16i1(<16 x i8> [[B:%.*]], <16 x i8> [[A:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]])
// CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.arm.mve.vmla.n.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]])
// CHECK-NEXT: ret <16 x i8> [[TMP3]]
//
uint8x16_t test_vmlaq_m_n_u8(uint8x16_t a, uint8x16_t b, uint8_t c, mve_pred16_t p) {
@ -684,7 +672,7 @@ uint8x16_t test_vmlaq_m_n_u8(uint8x16_t a, uint8x16_t b, uint8_t c, mve_pred16_t
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.arm.mve.vmla.n.predicated.v8i16.v8i1(<8 x i16> [[B:%.*]], <8 x i16> [[A:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]])
// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.arm.mve.vmla.n.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]])
// CHECK-NEXT: ret <8 x i16> [[TMP3]]
//
uint16x8_t test_vmlaq_m_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c, mve_pred16_t p) {
@ -699,7 +687,7 @@ uint16x8_t test_vmlaq_m_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c, mve_pred16
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmla.n.predicated.v4i32.v4i1(<4 x i32> [[B:%.*]], <4 x i32> [[A:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]])
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmla.n.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]])
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
uint32x4_t test_vmlaq_m_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c, mve_pred16_t p) {
@ -809,7 +797,7 @@ uint32x4_t test_vmlasq_m_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c, mve_pred1
// CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.arm.mve.vqdmlah.predicated.v16i8.v16i1(<16 x i8> [[B:%.*]], <16 x i8> [[A:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]])
// CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.arm.mve.vqdmlah.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]])
// CHECK-NEXT: ret <16 x i8> [[TMP3]]
//
int8x16_t test_vqdmlahq_m_n_s8(int8x16_t a, int8x16_t b, int8_t c, mve_pred16_t p) {
@ -825,7 +813,7 @@ int8x16_t test_vqdmlahq_m_n_s8(int8x16_t a, int8x16_t b, int8_t c, mve_pred16_t
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.arm.mve.vqdmlah.predicated.v8i16.v8i1(<8 x i16> [[B:%.*]], <8 x i16> [[A:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]])
// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.arm.mve.vqdmlah.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]])
// CHECK-NEXT: ret <8 x i16> [[TMP3]]
//
int16x8_t test_vqdmlahq_m_n_s16(int16x8_t a, int16x8_t b, int16_t c, mve_pred16_t p) {
@ -840,7 +828,7 @@ int16x8_t test_vqdmlahq_m_n_s16(int16x8_t a, int16x8_t b, int16_t c, mve_pred16_
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vqdmlah.predicated.v4i32.v4i1(<4 x i32> [[B:%.*]], <4 x i32> [[A:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]])
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vqdmlah.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]])
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
int32x4_t test_vqdmlahq_m_n_s32(int32x4_t a, int32x4_t b, int32_t c, mve_pred16_t p) {
@ -903,7 +891,7 @@ int32x4_t test_vqdmlashq_m_n_s32(int32x4_t m1, int32x4_t m2, int32_t add, mve_pr
// CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.arm.mve.vqrdmlah.predicated.v16i8.v16i1(<16 x i8> [[B:%.*]], <16 x i8> [[A:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]])
// CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.arm.mve.vqrdmlah.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]])
// CHECK-NEXT: ret <16 x i8> [[TMP3]]
//
int8x16_t test_vqrdmlahq_m_n_s8(int8x16_t a, int8x16_t b, int8_t c, mve_pred16_t p) {
@ -919,7 +907,7 @@ int8x16_t test_vqrdmlahq_m_n_s8(int8x16_t a, int8x16_t b, int8_t c, mve_pred16_t
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.arm.mve.vqrdmlah.predicated.v8i16.v8i1(<8 x i16> [[B:%.*]], <8 x i16> [[A:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]])
// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.arm.mve.vqrdmlah.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]])
// CHECK-NEXT: ret <8 x i16> [[TMP3]]
//
int16x8_t test_vqrdmlahq_m_n_s16(int16x8_t a, int16x8_t b, int16_t c, mve_pred16_t p) {
@ -934,7 +922,7 @@ int16x8_t test_vqrdmlahq_m_n_s16(int16x8_t a, int16x8_t b, int16_t c, mve_pred16
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vqrdmlah.predicated.v4i32.v4i1(<4 x i32> [[B:%.*]], <4 x i32> [[A:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]])
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vqrdmlah.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]])
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
int32x4_t test_vqrdmlahq_m_n_s32(int32x4_t a, int32x4_t b, int32_t c, mve_pred16_t p) {

View File

@ -114,13 +114,10 @@ uint32x4_t test_vaddq_n_u32(uint32x4_t a, uint32_t b)
// CHECK-LABEL: @test_vaddq_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B_COERCE:%.*]] to i32
// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = fadd <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: ret <8 x half> [[TMP2]]
// CHECK-NEXT: [[TMP0:%.*]] = fadd <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: ret <8 x half> [[TMP0]]
//
float16x8_t test_vaddq_n_f16(float16x8_t a, float16_t b)
{
@ -187,15 +184,12 @@ uint16x8_t test_vaddq_x_n_u16(uint16x8_t a, uint16_t b, mve_pred16_t p)
// CHECK-LABEL: @test_vaddq_x_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B_COERCE:%.*]] to i32
// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = call <8 x half> @llvm.arm.mve.add.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP3]], <8 x half> undef)
// CHECK-NEXT: ret <8 x half> [[TMP4]]
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.add.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x half> undef)
// CHECK-NEXT: ret <8 x half> [[TMP2]]
//
float16x8_t test_vaddq_x_n_f16(float16x8_t a, float16_t b, mve_pred16_t p)
{

View File

@ -264,17 +264,8 @@ uint32_t test_vmaxavq_s32(uint32_t a, int32x4_t b) {
// CHECK-LABEL: @test_vminnmvq_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A_COERCE:%.*]] to i32
// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half
// CHECK-NEXT: [[TMP2:%.*]] = call half @llvm.arm.mve.minnmv.f16.v8f16(half [[TMP1]], <8 x half> [[B:%.*]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast half [[TMP2]] to i16
// CHECK-NEXT: [[TMP4:%.*]] = bitcast float undef to i32
// CHECK-NEXT: [[TMP2_0_INSERT_EXT:%.*]] = zext i16 [[TMP3]] to i32
// CHECK-NEXT: [[TMP2_0_INSERT_MASK:%.*]] = and i32 [[TMP4]], -65536
// CHECK-NEXT: [[TMP2_0_INSERT_INSERT:%.*]] = or i32 [[TMP2_0_INSERT_MASK]], [[TMP2_0_INSERT_EXT]]
// CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP2_0_INSERT_INSERT]] to float
// CHECK-NEXT: ret float [[TMP5]]
// CHECK-NEXT: [[TMP0:%.*]] = call half @llvm.arm.mve.minnmv.f16.v8f16(half [[A:%.*]], <8 x half> [[B:%.*]])
// CHECK-NEXT: ret half [[TMP0]]
//
float16_t test_vminnmvq_f16(float16_t a, float16x8_t b) {
#ifdef POLYMORPHIC
@ -299,17 +290,8 @@ float32_t test_vminnmvq_f32(float32_t a, float32x4_t b) {
// CHECK-LABEL: @test_vminnmavq_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A_COERCE:%.*]] to i32
// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half
// CHECK-NEXT: [[TMP2:%.*]] = call half @llvm.arm.mve.minnmav.f16.v8f16(half [[TMP1]], <8 x half> [[B:%.*]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast half [[TMP2]] to i16
// CHECK-NEXT: [[TMP4:%.*]] = bitcast float undef to i32
// CHECK-NEXT: [[TMP2_0_INSERT_EXT:%.*]] = zext i16 [[TMP3]] to i32
// CHECK-NEXT: [[TMP2_0_INSERT_MASK:%.*]] = and i32 [[TMP4]], -65536
// CHECK-NEXT: [[TMP2_0_INSERT_INSERT:%.*]] = or i32 [[TMP2_0_INSERT_MASK]], [[TMP2_0_INSERT_EXT]]
// CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP2_0_INSERT_INSERT]] to float
// CHECK-NEXT: ret float [[TMP5]]
// CHECK-NEXT: [[TMP0:%.*]] = call half @llvm.arm.mve.minnmav.f16.v8f16(half [[A:%.*]], <8 x half> [[B:%.*]])
// CHECK-NEXT: ret half [[TMP0]]
//
float16_t test_vminnmavq_f16(float16_t a, float16x8_t b) {
#ifdef POLYMORPHIC
@ -334,17 +316,8 @@ float32_t test_vminnmavq_f32(float32_t a, float32x4_t b) {
// CHECK-LABEL: @test_vmaxnmvq_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A_COERCE:%.*]] to i32
// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half
// CHECK-NEXT: [[TMP2:%.*]] = call half @llvm.arm.mve.maxnmv.f16.v8f16(half [[TMP1]], <8 x half> [[B:%.*]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast half [[TMP2]] to i16
// CHECK-NEXT: [[TMP4:%.*]] = bitcast float undef to i32
// CHECK-NEXT: [[TMP2_0_INSERT_EXT:%.*]] = zext i16 [[TMP3]] to i32
// CHECK-NEXT: [[TMP2_0_INSERT_MASK:%.*]] = and i32 [[TMP4]], -65536
// CHECK-NEXT: [[TMP2_0_INSERT_INSERT:%.*]] = or i32 [[TMP2_0_INSERT_MASK]], [[TMP2_0_INSERT_EXT]]
// CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP2_0_INSERT_INSERT]] to float
// CHECK-NEXT: ret float [[TMP5]]
// CHECK-NEXT: [[TMP0:%.*]] = call half @llvm.arm.mve.maxnmv.f16.v8f16(half [[A:%.*]], <8 x half> [[B:%.*]])
// CHECK-NEXT: ret half [[TMP0]]
//
float16_t test_vmaxnmvq_f16(float16_t a, float16x8_t b) {
#ifdef POLYMORPHIC
@ -369,17 +342,8 @@ float32_t test_vmaxnmvq_f32(float32_t a, float32x4_t b) {
// CHECK-LABEL: @test_vmaxnmavq_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A_COERCE:%.*]] to i32
// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half
// CHECK-NEXT: [[TMP2:%.*]] = call half @llvm.arm.mve.maxnmav.f16.v8f16(half [[TMP1]], <8 x half> [[B:%.*]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast half [[TMP2]] to i16
// CHECK-NEXT: [[TMP4:%.*]] = bitcast float undef to i32
// CHECK-NEXT: [[TMP2_0_INSERT_EXT:%.*]] = zext i16 [[TMP3]] to i32
// CHECK-NEXT: [[TMP2_0_INSERT_MASK:%.*]] = and i32 [[TMP4]], -65536
// CHECK-NEXT: [[TMP2_0_INSERT_INSERT:%.*]] = or i32 [[TMP2_0_INSERT_MASK]], [[TMP2_0_INSERT_EXT]]
// CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP2_0_INSERT_INSERT]] to float
// CHECK-NEXT: ret float [[TMP5]]
// CHECK-NEXT: [[TMP0:%.*]] = call half @llvm.arm.mve.maxnmav.f16.v8f16(half [[A:%.*]], <8 x half> [[B:%.*]])
// CHECK-NEXT: ret half [[TMP0]]
//
float16_t test_vmaxnmavq_f16(float16_t a, float16x8_t b) {
#ifdef POLYMORPHIC
@ -698,19 +662,10 @@ uint32_t test_vmaxavq_p_s32(uint32_t a, int32x4_t b, mve_pred16_t p) {
// CHECK-LABEL: @test_vminnmvq_p_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A_COERCE:%.*]] to i32
// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half
// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = call half @llvm.arm.mve.minnmv.predicated.f16.v8f16.v8i1(half [[TMP1]], <8 x half> [[B:%.*]], <8 x i1> [[TMP3]])
// CHECK-NEXT: [[TMP5:%.*]] = bitcast half [[TMP4]] to i16
// CHECK-NEXT: [[TMP6:%.*]] = bitcast float undef to i32
// CHECK-NEXT: [[TMP2_0_INSERT_EXT:%.*]] = zext i16 [[TMP5]] to i32
// CHECK-NEXT: [[TMP2_0_INSERT_MASK:%.*]] = and i32 [[TMP6]], -65536
// CHECK-NEXT: [[TMP2_0_INSERT_INSERT:%.*]] = or i32 [[TMP2_0_INSERT_MASK]], [[TMP2_0_INSERT_EXT]]
// CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[TMP2_0_INSERT_INSERT]] to float
// CHECK-NEXT: ret float [[TMP7]]
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call half @llvm.arm.mve.minnmv.predicated.f16.v8f16.v8i1(half [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]])
// CHECK-NEXT: ret half [[TMP2]]
//
float16_t test_vminnmvq_p_f16(float16_t a, float16x8_t b, mve_pred16_t p) {
#ifdef POLYMORPHIC
@ -737,19 +692,10 @@ float32_t test_vminnmvq_p_f32(float32_t a, float32x4_t b, mve_pred16_t p) {
// CHECK-LABEL: @test_vminnmavq_p_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A_COERCE:%.*]] to i32
// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half
// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = call half @llvm.arm.mve.minnmav.predicated.f16.v8f16.v8i1(half [[TMP1]], <8 x half> [[B:%.*]], <8 x i1> [[TMP3]])
// CHECK-NEXT: [[TMP5:%.*]] = bitcast half [[TMP4]] to i16
// CHECK-NEXT: [[TMP6:%.*]] = bitcast float undef to i32
// CHECK-NEXT: [[TMP2_0_INSERT_EXT:%.*]] = zext i16 [[TMP5]] to i32
// CHECK-NEXT: [[TMP2_0_INSERT_MASK:%.*]] = and i32 [[TMP6]], -65536
// CHECK-NEXT: [[TMP2_0_INSERT_INSERT:%.*]] = or i32 [[TMP2_0_INSERT_MASK]], [[TMP2_0_INSERT_EXT]]
// CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[TMP2_0_INSERT_INSERT]] to float
// CHECK-NEXT: ret float [[TMP7]]
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call half @llvm.arm.mve.minnmav.predicated.f16.v8f16.v8i1(half [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]])
// CHECK-NEXT: ret half [[TMP2]]
//
float16_t test_vminnmavq_p_f16(float16_t a, float16x8_t b, mve_pred16_t p) {
#ifdef POLYMORPHIC
@ -776,19 +722,10 @@ float32_t test_vminnmavq_p_f32(float32_t a, float32x4_t b, mve_pred16_t p) {
// CHECK-LABEL: @test_vmaxnmvq_p_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A_COERCE:%.*]] to i32
// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half
// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = call half @llvm.arm.mve.maxnmv.predicated.f16.v8f16.v8i1(half [[TMP1]], <8 x half> [[B:%.*]], <8 x i1> [[TMP3]])
// CHECK-NEXT: [[TMP5:%.*]] = bitcast half [[TMP4]] to i16
// CHECK-NEXT: [[TMP6:%.*]] = bitcast float undef to i32
// CHECK-NEXT: [[TMP2_0_INSERT_EXT:%.*]] = zext i16 [[TMP5]] to i32
// CHECK-NEXT: [[TMP2_0_INSERT_MASK:%.*]] = and i32 [[TMP6]], -65536
// CHECK-NEXT: [[TMP2_0_INSERT_INSERT:%.*]] = or i32 [[TMP2_0_INSERT_MASK]], [[TMP2_0_INSERT_EXT]]
// CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[TMP2_0_INSERT_INSERT]] to float
// CHECK-NEXT: ret float [[TMP7]]
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call half @llvm.arm.mve.maxnmv.predicated.f16.v8f16.v8i1(half [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]])
// CHECK-NEXT: ret half [[TMP2]]
//
float16_t test_vmaxnmvq_p_f16(float16_t a, float16x8_t b, mve_pred16_t p) {
#ifdef POLYMORPHIC
@ -815,19 +752,10 @@ float32_t test_vmaxnmvq_p_f32(float32_t a, float32x4_t b, mve_pred16_t p) {
// CHECK-LABEL: @test_vmaxnmavq_p_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A_COERCE:%.*]] to i32
// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half
// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = call half @llvm.arm.mve.maxnmav.predicated.f16.v8f16.v8i1(half [[TMP1]], <8 x half> [[B:%.*]], <8 x i1> [[TMP3]])
// CHECK-NEXT: [[TMP5:%.*]] = bitcast half [[TMP4]] to i16
// CHECK-NEXT: [[TMP6:%.*]] = bitcast float undef to i32
// CHECK-NEXT: [[TMP2_0_INSERT_EXT:%.*]] = zext i16 [[TMP5]] to i32
// CHECK-NEXT: [[TMP2_0_INSERT_MASK:%.*]] = and i32 [[TMP6]], -65536
// CHECK-NEXT: [[TMP2_0_INSERT_INSERT:%.*]] = or i32 [[TMP2_0_INSERT_MASK]], [[TMP2_0_INSERT_EXT]]
// CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[TMP2_0_INSERT_INSERT]] to float
// CHECK-NEXT: ret float [[TMP7]]
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call half @llvm.arm.mve.maxnmav.predicated.f16.v8f16.v8i1(half [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]])
// CHECK-NEXT: ret half [[TMP2]]
//
float16_t test_vmaxnmavq_p_f16(float16_t a, float16x8_t b, mve_pred16_t p) {
#ifdef POLYMORPHIC

View File

@ -308,15 +308,12 @@ int32x4_t test_vmulq_m_n_s32(int32x4_t inactive, int32x4_t a, int32_t b, mve_pre
// CHECK-LABEL: @test_vmulq_m_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B_COERCE:%.*]] to i32
// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = call <8 x half> @llvm.arm.mve.mul.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP3]], <8 x half> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <8 x half> [[TMP4]]
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.mul.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <8 x half> [[TMP2]]
//
float16x8_t test_vmulq_m_n_f16(float16x8_t inactive, float16x8_t a, float16_t b, mve_pred16_t p)
{

View File

@ -114,13 +114,10 @@ uint32x4_t test_vsubq_n_u32(uint32x4_t a, uint32_t b)
// CHECK-LABEL: @test_vsubq_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B_COERCE:%.*]] to i32
// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = fsub <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: ret <8 x half> [[TMP2]]
// CHECK-NEXT: [[TMP0:%.*]] = fsub <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: ret <8 x half> [[TMP0]]
//
float16x8_t test_vsubq_n_f16(float16x8_t a, float16_t b)
{
@ -187,15 +184,12 @@ uint16x8_t test_vsubq_x_n_u16(uint16x8_t a, uint16_t b, mve_pred16_t p)
// CHECK-LABEL: @test_vsubq_x_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B_COERCE:%.*]] to i32
// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = call <8 x half> @llvm.arm.mve.sub.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP3]], <8 x half> undef)
// CHECK-NEXT: ret <8 x half> [[TMP4]]
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.sub.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x half> undef)
// CHECK-NEXT: ret <8 x half> [[TMP2]]
//
float16x8_t test_vsubq_x_n_f16(float16x8_t a, float16_t b, mve_pred16_t p)
{

View File

@ -1,59 +0,0 @@
// RUN: %clang_cc1 -triple thumbv8m.main -O0 -mcmse -S -emit-llvm \
// RUN: -fallow-half-arguments-and-returns %s -o - | \
// RUN: FileCheck %s --check-prefixes=CHECK,CHECK-NOPT-SOFT
// RUN: %clang_cc1 -triple thumbv8m.main -O2 -mcmse -S -emit-llvm \
// RUN: -fallow-half-arguments-and-returns %s -o - | \
// RUN: FileCheck %s --check-prefixes=CHECK,CHECK-OPT-SOFT
// RUN: %clang_cc1 -triple thumbv8m.main -O0 -mcmse -S -emit-llvm \
// RUN: -fallow-half-arguments-and-returns -mfloat-abi hard %s -o - | \
// RUN: FileCheck %s --check-prefixes=CHECK,CHECK-NOPT-HARD
// RUN: %clang_cc1 -triple thumbv8m.main -O2 -mcmse -S -emit-llvm \
// RUN: -fallow-half-arguments-and-returns -mfloat-abi hard %s -o - | \
// RUN: FileCheck %s --check-prefixes=CHECK,CHECK-OPT-HARD
__fp16 g0();
__attribute__((cmse_nonsecure_entry)) __fp16 f0() {
return g0();
}
// CHECK: define {{.*}}@f0()
// CHECK-NOPT-SOFT: %[[V0:.*]] = load i32
// CHECK-NOPT-SOFT: %[[V1:.*]] = and i32 %[[V0]], 65535
// CHECK-NOPT-SOFT: ret i32 %[[V1]]
// CHECK-OPT-SOFT: %[[V0:.*]] = tail call {{.*}} @g0
// CHECK-OPT-SOFT: %[[V1:.*]] = and i32 %[[V0]], 65535
// CHECK-OPT-SOFT: ret i32 %[[V1]]
// CHECK-NOPT-HARD: %[[V0:.*]] = bitcast float {{.*}} to i32
// CHECK-NOPT-HARD: %[[V1:.*]] = and i32 %[[V0]], 65535
// CHECK-NOPT-HARD: %[[V2:.*]] = bitcast i32 %[[V1]] to float
// CHECK-NOPT-HARD: ret float %[[V2]]
// CHECK-OPT-HARD: %[[V0:.*]] = bitcast float {{.*}} to i32
// CHECK-OPT-HARD: %[[V1:.*]] = and i32 %[[V0]], 65535
// CHECK-OPT-HARD: %[[V2:.*]] = bitcast i32 %[[V1]] to float
// CHECK-OPT-HARD: ret float %[[V2]]
void __attribute__((cmse_nonsecure_call)) (*g1)(__fp16);
__fp16 x;
void f1() {
g1(x);
}
// CHECK: define {{.*}}@f1()
// CHECK-NOPT-SOFT: %[[V0:.*]] = load i32
// CHECK-NOPT-SOFT: %[[V1:.*]] = and i32 %[[V0]], 65535
// CHECK-NOPT-SOFT: call {{.*}} void {{.*}}(i32 %[[V1]])
// CHECK-OPT-SOFT: %[[V1:.*]] = zext i16 {{.*}} to i32
// CHECK-OPT-SOFT: call {{.*}} void {{.*}}(i32 %[[V1]])
// CHECK-NOPT-HARD: %[[V0:.*]] = bitcast float {{.*}} to i32
// CHECK-NOPT-HARD: %[[V1:.*]] = and i32 %[[V0]], 65535
// CHECK-NOPT-HARD: %[[V2:.*]] = bitcast i32 %[[V1]] to float
// CHECK-NOPT-HARD: call {{.*}}(float %[[V2]])
// CHECK-OPT-HARD: %[[V0:.*]] = zext i16 {{.*}} to i32
// CHECK-OPT-HARD: %[[V1:.*]] = bitcast i32 %[[V0]] to float
// CHECK-OPT-HARD: call {{.*}}(float %[[V1]])