forked from OSchip/llvm-project
[X86] Remove masking from the 512-bit masked floating point add/sub/mul/div intrinsics. Use a select in IR instead.
llvm-svn: 334358
This commit is contained in:
parent
69d6418d60
commit
98a79934af
|
@ -3744,30 +3744,31 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||
// Arithmetic ops
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
|
||||
def int_x86_avx512_mask_add_ps_512 : GCCBuiltin<"__builtin_ia32_addps512_mask">,
|
||||
def int_x86_avx512_add_ps_512 : GCCBuiltin<"__builtin_ia32_addps512">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_add_pd_512 : GCCBuiltin<"__builtin_ia32_addpd512_mask">,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_add_pd_512 : GCCBuiltin<"__builtin_ia32_addpd512">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_sub_ps_512 : GCCBuiltin<"__builtin_ia32_subps512_mask">,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_sub_ps_512 : GCCBuiltin<"__builtin_ia32_subps512">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_sub_pd_512 : GCCBuiltin<"__builtin_ia32_subpd512_mask">,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_sub_pd_512 : GCCBuiltin<"__builtin_ia32_subpd512">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_mul_ps_512 : GCCBuiltin<"__builtin_ia32_mulps512_mask">,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mul_ps_512 : GCCBuiltin<"__builtin_ia32_mulps512">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_mul_pd_512 : GCCBuiltin<"__builtin_ia32_mulpd512_mask">,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mul_pd_512 : GCCBuiltin<"__builtin_ia32_mulpd512">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_div_ps_512 : GCCBuiltin<"__builtin_ia32_divps512_mask">,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_div_ps_512 : GCCBuiltin<"__builtin_ia32_divps512">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_div_pd_512 : GCCBuiltin<"__builtin_ia32_divpd512_mask">,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_div_pd_512 : GCCBuiltin<"__builtin_ia32_divpd512">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_max_ps_512 : GCCBuiltin<"__builtin_ia32_maxps512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
|
|
|
@ -216,22 +216,6 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
|
|||
Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
|
||||
Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
|
||||
Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
|
||||
Name == "avx512.mask.add.pd.128" || // Added in 4.0
|
||||
Name == "avx512.mask.add.pd.256" || // Added in 4.0
|
||||
Name == "avx512.mask.add.ps.128" || // Added in 4.0
|
||||
Name == "avx512.mask.add.ps.256" || // Added in 4.0
|
||||
Name == "avx512.mask.div.pd.128" || // Added in 4.0
|
||||
Name == "avx512.mask.div.pd.256" || // Added in 4.0
|
||||
Name == "avx512.mask.div.ps.128" || // Added in 4.0
|
||||
Name == "avx512.mask.div.ps.256" || // Added in 4.0
|
||||
Name == "avx512.mask.mul.pd.128" || // Added in 4.0
|
||||
Name == "avx512.mask.mul.pd.256" || // Added in 4.0
|
||||
Name == "avx512.mask.mul.ps.128" || // Added in 4.0
|
||||
Name == "avx512.mask.mul.ps.256" || // Added in 4.0
|
||||
Name == "avx512.mask.sub.pd.128" || // Added in 4.0
|
||||
Name == "avx512.mask.sub.pd.256" || // Added in 4.0
|
||||
Name == "avx512.mask.sub.ps.128" || // Added in 4.0
|
||||
Name == "avx512.mask.sub.ps.256" || // Added in 4.0
|
||||
Name == "avx512.mask.max.pd.128" || // Added in 5.0
|
||||
Name == "avx512.mask.max.pd.256" || // Added in 5.0
|
||||
Name == "avx512.mask.max.ps.128" || // Added in 5.0
|
||||
|
@ -278,6 +262,10 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
|
|||
Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
|
||||
Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
|
||||
Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
|
||||
Name.startswith("avx512.mask.add.p") || // Added in 7.0
|
||||
Name.startswith("avx512.mask.sub.p") || // Added in 7.0
|
||||
Name.startswith("avx512.mask.mul.p") || // Added in 7.0
|
||||
Name.startswith("avx512.mask.div.p") || // Added in 7.0
|
||||
Name == "sse.cvtsi2ss" || // Added in 7.0
|
||||
Name == "sse.cvtsi642ss" || // Added in 7.0
|
||||
Name == "sse2.cvtsi2sd" || // Added in 7.0
|
||||
|
@ -2213,20 +2201,68 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
|||
Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
|
||||
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
|
||||
CI->getArgOperand(2));
|
||||
} else if (IsX86 && (Name.startswith("avx512.mask.add.p"))) {
|
||||
Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
|
||||
} else if (IsX86 && Name.startswith("avx512.mask.add.p")) {
|
||||
if (Name.endswith(".512")) {
|
||||
Intrinsic::ID IID;
|
||||
if (Name[17] == 's')
|
||||
IID = Intrinsic::x86_avx512_add_ps_512;
|
||||
else
|
||||
IID = Intrinsic::x86_avx512_add_pd_512;
|
||||
|
||||
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
|
||||
{ CI->getArgOperand(0), CI->getArgOperand(1),
|
||||
CI->getArgOperand(4) });
|
||||
} else {
|
||||
Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
|
||||
}
|
||||
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
|
||||
CI->getArgOperand(2));
|
||||
} else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
|
||||
Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
|
||||
if (Name.endswith(".512")) {
|
||||
Intrinsic::ID IID;
|
||||
if (Name[17] == 's')
|
||||
IID = Intrinsic::x86_avx512_div_ps_512;
|
||||
else
|
||||
IID = Intrinsic::x86_avx512_div_pd_512;
|
||||
|
||||
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
|
||||
{ CI->getArgOperand(0), CI->getArgOperand(1),
|
||||
CI->getArgOperand(4) });
|
||||
} else {
|
||||
Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
|
||||
}
|
||||
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
|
||||
CI->getArgOperand(2));
|
||||
} else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
|
||||
Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
|
||||
if (Name.endswith(".512")) {
|
||||
Intrinsic::ID IID;
|
||||
if (Name[17] == 's')
|
||||
IID = Intrinsic::x86_avx512_mul_ps_512;
|
||||
else
|
||||
IID = Intrinsic::x86_avx512_mul_pd_512;
|
||||
|
||||
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
|
||||
{ CI->getArgOperand(0), CI->getArgOperand(1),
|
||||
CI->getArgOperand(4) });
|
||||
} else {
|
||||
Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
|
||||
}
|
||||
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
|
||||
CI->getArgOperand(2));
|
||||
} else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
|
||||
Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
|
||||
if (Name.endswith(".512")) {
|
||||
Intrinsic::ID IID;
|
||||
if (Name[17] == 's')
|
||||
IID = Intrinsic::x86_avx512_sub_ps_512;
|
||||
else
|
||||
IID = Intrinsic::x86_avx512_sub_pd_512;
|
||||
|
||||
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
|
||||
{ CI->getArgOperand(0), CI->getArgOperand(1),
|
||||
CI->getArgOperand(4) });
|
||||
} else {
|
||||
Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
|
||||
}
|
||||
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
|
||||
CI->getArgOperand(2));
|
||||
} else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
|
||||
|
|
|
@ -20312,9 +20312,22 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
switch(IntrData->Type) {
|
||||
case INTR_TYPE_1OP:
|
||||
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1));
|
||||
case INTR_TYPE_2OP:
|
||||
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
|
||||
Op.getOperand(2));
|
||||
case INTR_TYPE_2OP: {
|
||||
// We specify 2 possible opcodes for intrinsics with rounding modes.
|
||||
// First, we check if the intrinsic may have non-default rounding mode,
|
||||
// (IntrData->Opc1 != 0), then we check the rounding mode operand.
|
||||
unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
|
||||
if (IntrWithRoundingModeOpcode != 0) {
|
||||
SDValue Rnd = Op.getOperand(3);
|
||||
if (!isRoundModeCurDirection(Rnd)) {
|
||||
return DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(),
|
||||
Op.getOperand(1), Op.getOperand(2), Rnd);
|
||||
}
|
||||
}
|
||||
|
||||
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
|
||||
Op.getOperand(1), Op.getOperand(2));
|
||||
}
|
||||
case INTR_TYPE_3OP:
|
||||
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
|
||||
Op.getOperand(2), Op.getOperand(3));
|
||||
|
|
|
@ -449,6 +449,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
|||
X86_INTRINSIC_DATA(avx2_psubs_w, INTR_TYPE_2OP, X86ISD::SUBS, 0),
|
||||
X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
|
||||
X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_add_pd_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
|
||||
X86_INTRINSIC_DATA(avx512_add_ps_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
|
||||
X86_INTRINSIC_DATA(avx512_cvtsi2sd64, INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_cvtsi2ss32, INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_cvtsi2ss64, INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0),
|
||||
|
@ -463,12 +465,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
|||
X86_INTRINSIC_DATA(avx512_cvtusi2ss, INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_cvtusi642sd, INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_cvtusi642ss, INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_div_pd_512, INTR_TYPE_2OP, ISD::FDIV, X86ISD::FDIV_RND),
|
||||
X86_INTRINSIC_DATA(avx512_div_ps_512, INTR_TYPE_2OP, ISD::FDIV, X86ISD::FDIV_RND),
|
||||
X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
|
||||
X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_add_pd_512, INTR_TYPE_2OP_MASK, ISD::FADD,
|
||||
X86ISD::FADD_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_add_ps_512, INTR_TYPE_2OP_MASK, ISD::FADD,
|
||||
X86ISD::FADD_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_add_sd_round, INTR_TYPE_SCALAR_MASK_RM,
|
||||
X86ISD::FADDS_RND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_add_ss_round, INTR_TYPE_SCALAR_MASK_RM,
|
||||
|
@ -658,10 +658,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
|||
X86ISD::DBPSADBW, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_dbpsadbw_512, INTR_TYPE_3OP_IMM8_MASK,
|
||||
X86ISD::DBPSADBW, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_div_pd_512, INTR_TYPE_2OP_MASK, ISD::FDIV,
|
||||
X86ISD::FDIV_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_div_ps_512, INTR_TYPE_2OP_MASK, ISD::FDIV,
|
||||
X86ISD::FDIV_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_div_sd_round, INTR_TYPE_SCALAR_MASK_RM,
|
||||
X86ISD::FDIVS_RND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_div_ss_round, INTR_TYPE_SCALAR_MASK_RM,
|
||||
|
@ -766,10 +762,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
|||
X86ISD::FMINS, X86ISD::FMINS_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_min_ss_round, INTR_TYPE_SCALAR_MASK,
|
||||
X86ISD::FMINS, X86ISD::FMINS_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_mul_pd_512, INTR_TYPE_2OP_MASK, ISD::FMUL,
|
||||
X86ISD::FMUL_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_mul_ps_512, INTR_TYPE_2OP_MASK, ISD::FMUL,
|
||||
X86ISD::FMUL_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_mul_sd_round, INTR_TYPE_SCALAR_MASK_RM,
|
||||
X86ISD::FMULS_RND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_mul_ss_round, INTR_TYPE_SCALAR_MASK_RM,
|
||||
|
@ -990,10 +982,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
|||
X86ISD::FSQRTS_RND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_sqrt_ss, INTR_TYPE_SCALAR_MASK_RM,
|
||||
X86ISD::FSQRTS_RND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_sub_pd_512, INTR_TYPE_2OP_MASK, ISD::FSUB,
|
||||
X86ISD::FSUB_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_sub_ps_512, INTR_TYPE_2OP_MASK, ISD::FSUB,
|
||||
X86ISD::FSUB_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_sub_sd_round, INTR_TYPE_SCALAR_MASK_RM,
|
||||
X86ISD::FSUBS_RND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_sub_ss_round, INTR_TYPE_SCALAR_MASK_RM,
|
||||
|
@ -1197,6 +1185,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
|||
X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_w_256, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_w_512, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
|
||||
|
||||
X86_INTRINSIC_DATA(avx512_mul_pd_512, INTR_TYPE_2OP, ISD::FMUL, X86ISD::FMUL_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mul_ps_512, INTR_TYPE_2OP, ISD::FMUL, X86ISD::FMUL_RND),
|
||||
X86_INTRINSIC_DATA(avx512_packssdw_512, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_packsswb_512, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_packusdw_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
|
||||
|
@ -1289,6 +1279,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
|||
X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rsqrt28_sd, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28S, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rsqrt28_ss, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28S, 0),
|
||||
X86_INTRINSIC_DATA(avx512_sub_pd_512, INTR_TYPE_2OP, ISD::FSUB, X86ISD::FSUB_RND),
|
||||
X86_INTRINSIC_DATA(avx512_sub_ps_512, INTR_TYPE_2OP, ISD::FSUB, X86ISD::FSUB_RND),
|
||||
X86_INTRINSIC_DATA(avx512_vcomi_sd, COMI_RM, X86ISD::COMI, X86ISD::UCOMI),
|
||||
X86_INTRINSIC_DATA(avx512_vcomi_ss, COMI_RM, X86ISD::COMI, X86ISD::UCOMI),
|
||||
X86_INTRINSIC_DATA(avx512_vcvtsd2si32, INTR_TYPE_2OP, X86ISD::CVTS2SI_RND, 0),
|
||||
|
|
|
@ -1034,36 +1034,6 @@ static Value *simplifyX86vpcom(const IntrinsicInst &II,
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
// Emit a select instruction and appropriate bitcasts to help simplify
|
||||
// masked intrinsics.
|
||||
static Value *emitX86MaskSelect(Value *Mask, Value *Op0, Value *Op1,
|
||||
InstCombiner::BuilderTy &Builder) {
|
||||
unsigned VWidth = Op0->getType()->getVectorNumElements();
|
||||
|
||||
// If the mask is all ones we don't need the select. But we need to check
|
||||
// only the bit thats will be used in case VWidth is less than 8.
|
||||
if (auto *C = dyn_cast<ConstantInt>(Mask))
|
||||
if (C->getValue().zextOrTrunc(VWidth).isAllOnesValue())
|
||||
return Op0;
|
||||
|
||||
auto *MaskTy = VectorType::get(Builder.getInt1Ty(),
|
||||
cast<IntegerType>(Mask->getType())->getBitWidth());
|
||||
Mask = Builder.CreateBitCast(Mask, MaskTy);
|
||||
|
||||
// If we have less than 8 elements, then the starting mask was an i8 and
|
||||
// we need to extract down to the right number of elements.
|
||||
if (VWidth < 8) {
|
||||
uint32_t Indices[4];
|
||||
for (unsigned i = 0; i != VWidth; ++i)
|
||||
Indices[i] = i;
|
||||
Mask = Builder.CreateShuffleVector(Mask, Mask,
|
||||
makeArrayRef(Indices, VWidth),
|
||||
"extract");
|
||||
}
|
||||
|
||||
return Builder.CreateSelect(Mask, Op0, Op1);
|
||||
}
|
||||
|
||||
static Value *simplifyMinnumMaxnum(const IntrinsicInst &II) {
|
||||
Value *Arg0 = II.getArgOperand(0);
|
||||
Value *Arg1 = II.getArgOperand(1);
|
||||
|
@ -2341,17 +2311,17 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
|||
break;
|
||||
}
|
||||
|
||||
case Intrinsic::x86_avx512_mask_add_ps_512:
|
||||
case Intrinsic::x86_avx512_mask_div_ps_512:
|
||||
case Intrinsic::x86_avx512_mask_mul_ps_512:
|
||||
case Intrinsic::x86_avx512_mask_sub_ps_512:
|
||||
case Intrinsic::x86_avx512_mask_add_pd_512:
|
||||
case Intrinsic::x86_avx512_mask_div_pd_512:
|
||||
case Intrinsic::x86_avx512_mask_mul_pd_512:
|
||||
case Intrinsic::x86_avx512_mask_sub_pd_512:
|
||||
case Intrinsic::x86_avx512_add_ps_512:
|
||||
case Intrinsic::x86_avx512_div_ps_512:
|
||||
case Intrinsic::x86_avx512_mul_ps_512:
|
||||
case Intrinsic::x86_avx512_sub_ps_512:
|
||||
case Intrinsic::x86_avx512_add_pd_512:
|
||||
case Intrinsic::x86_avx512_div_pd_512:
|
||||
case Intrinsic::x86_avx512_mul_pd_512:
|
||||
case Intrinsic::x86_avx512_sub_pd_512:
|
||||
// If the rounding mode is CUR_DIRECTION(4) we can turn these into regular
|
||||
// IR operations.
|
||||
if (auto *R = dyn_cast<ConstantInt>(II->getArgOperand(4))) {
|
||||
if (auto *R = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
|
||||
if (R->getValue() == 4) {
|
||||
Value *Arg0 = II->getArgOperand(0);
|
||||
Value *Arg1 = II->getArgOperand(1);
|
||||
|
@ -2359,27 +2329,24 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
|||
Value *V;
|
||||
switch (II->getIntrinsicID()) {
|
||||
default: llvm_unreachable("Case stmts out of sync!");
|
||||
case Intrinsic::x86_avx512_mask_add_ps_512:
|
||||
case Intrinsic::x86_avx512_mask_add_pd_512:
|
||||
case Intrinsic::x86_avx512_add_ps_512:
|
||||
case Intrinsic::x86_avx512_add_pd_512:
|
||||
V = Builder.CreateFAdd(Arg0, Arg1);
|
||||
break;
|
||||
case Intrinsic::x86_avx512_mask_sub_ps_512:
|
||||
case Intrinsic::x86_avx512_mask_sub_pd_512:
|
||||
case Intrinsic::x86_avx512_sub_ps_512:
|
||||
case Intrinsic::x86_avx512_sub_pd_512:
|
||||
V = Builder.CreateFSub(Arg0, Arg1);
|
||||
break;
|
||||
case Intrinsic::x86_avx512_mask_mul_ps_512:
|
||||
case Intrinsic::x86_avx512_mask_mul_pd_512:
|
||||
case Intrinsic::x86_avx512_mul_ps_512:
|
||||
case Intrinsic::x86_avx512_mul_pd_512:
|
||||
V = Builder.CreateFMul(Arg0, Arg1);
|
||||
break;
|
||||
case Intrinsic::x86_avx512_mask_div_ps_512:
|
||||
case Intrinsic::x86_avx512_mask_div_pd_512:
|
||||
case Intrinsic::x86_avx512_div_ps_512:
|
||||
case Intrinsic::x86_avx512_div_pd_512:
|
||||
V = Builder.CreateFDiv(Arg0, Arg1);
|
||||
break;
|
||||
}
|
||||
|
||||
// Create a select for the masking.
|
||||
V = emitX86MaskSelect(II->getArgOperand(3), V, II->getArgOperand(2),
|
||||
Builder);
|
||||
return replaceInstUsesWith(*II, V);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6941,3 +6941,864 @@ define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16
|
|||
%res2 = add <16 x i32> %res, %res1
|
||||
ret <16 x i32> %res2
|
||||
}
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
|
||||
declare <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
|
||||
declare <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
|
||||
|
||||
define <16 x float> @test_vsubps_rn(<16 x float> %a0, <16 x float> %a1) {
|
||||
; CHECK-LABEL: test_vsubps_rn:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5c,0xc1]
|
||||
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
|
||||
<16 x float> zeroinitializer, i16 -1, i32 0)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) {
|
||||
; CHECK-LABEL: test_vsubps_rd:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x5c,0xc1]
|
||||
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
|
||||
<16 x float> zeroinitializer, i16 -1, i32 1)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) {
|
||||
; CHECK-LABEL: test_vsubps_ru:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x5c,0xc1]
|
||||
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
|
||||
<16 x float> zeroinitializer, i16 -1, i32 2)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) {
|
||||
; CHECK-LABEL: test_vsubps_rz:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x5c,0xc1]
|
||||
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
|
||||
<16 x float> zeroinitializer, i16 -1, i32 3)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) {
|
||||
; CHECK-LABEL: test_vmulps_rn:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x59,0xc1]
|
||||
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
|
||||
<16 x float> zeroinitializer, i16 -1, i32 0)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) {
|
||||
; CHECK-LABEL: test_vmulps_rd:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x59,0xc1]
|
||||
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
|
||||
<16 x float> zeroinitializer, i16 -1, i32 1)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) {
|
||||
; CHECK-LABEL: test_vmulps_ru:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x59,0xc1]
|
||||
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
|
||||
<16 x float> zeroinitializer, i16 -1, i32 2)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) {
|
||||
; CHECK-LABEL: test_vmulps_rz:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x59,0xc1]
|
||||
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
|
||||
<16 x float> zeroinitializer, i16 -1, i32 3)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
;; mask float
|
||||
define <16 x float> @test_vmulps_mask_rn(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; X86-LABEL: test_vmulps_mask_rn:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x59,0xc1]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_vmulps_mask_rn:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x59,0xc1]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
|
||||
<16 x float> zeroinitializer, i16 %mask, i32 0)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; X86-LABEL: test_vmulps_mask_rd:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x59,0xc1]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_vmulps_mask_rd:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x59,0xc1]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
|
||||
<16 x float> zeroinitializer, i16 %mask, i32 1)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; X86-LABEL: test_vmulps_mask_ru:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x59,0xc1]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_vmulps_mask_ru:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x59,0xc1]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
|
||||
<16 x float> zeroinitializer, i16 %mask, i32 2)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; X86-LABEL: test_vmulps_mask_rz:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x59,0xc1]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_vmulps_mask_rz:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x59,0xc1]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
|
||||
<16 x float> zeroinitializer, i16 %mask, i32 3)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
;; With Passthru value
|
||||
define <16 x float> @test_vmulps_mask_passthru_rn(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
|
||||
; X86-LABEL: test_vmulps_mask_passthru_rn:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x59,0xd1]
|
||||
; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_vmulps_mask_passthru_rn:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x59,0xd1]
|
||||
; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
|
||||
<16 x float> %passthru, i16 %mask, i32 0)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
|
||||
; X86-LABEL: test_vmulps_mask_passthru_rd:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x59,0xd1]
|
||||
; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_vmulps_mask_passthru_rd:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x59,0xd1]
|
||||
; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
|
||||
<16 x float> %passthru, i16 %mask, i32 1)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
|
||||
; X86-LABEL: test_vmulps_mask_passthru_ru:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x59,0xd1]
|
||||
; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_vmulps_mask_passthru_ru:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x59,0xd1]
|
||||
; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
|
||||
<16 x float> %passthru, i16 %mask, i32 2)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
|
||||
; X86-LABEL: test_vmulps_mask_passthru_rz:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x59,0xd1]
|
||||
; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_vmulps_mask_passthru_rz:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x59,0xd1]
|
||||
; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
|
||||
<16 x float> %passthru, i16 %mask, i32 3)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
;; mask double
|
||||
define <8 x double> @test_vmulpd_mask_rn(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
|
||||
; X86-LABEL: test_vmulpd_mask_rn:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
|
||||
; X86-NEXT: vmulpd {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0x59,0xc1]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_vmulpd_mask_rn:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vmulpd {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0x59,0xc1]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
|
||||
<8 x double> zeroinitializer, i8 %mask, i32 0)
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
|
||||
; X86-LABEL: test_vmulpd_mask_rd:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
|
||||
; X86-NEXT: vmulpd {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0x59,0xc1]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_vmulpd_mask_rd:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vmulpd {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0x59,0xc1]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
|
||||
<8 x double> zeroinitializer, i8 %mask, i32 1)
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
|
||||
; X86-LABEL: test_vmulpd_mask_ru:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
|
||||
; X86-NEXT: vmulpd {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0x59,0xc1]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_vmulpd_mask_ru:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vmulpd {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0x59,0xc1]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
|
||||
<8 x double> zeroinitializer, i8 %mask, i32 2)
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
|
||||
; X86-LABEL: test_vmulpd_mask_rz:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
|
||||
; X86-NEXT: vmulpd {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xf9,0x59,0xc1]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_vmulpd_mask_rz:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vmulpd {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xf9,0x59,0xc1]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
|
||||
<8 x double> zeroinitializer, i8 %mask, i32 3)
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_maskz_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; X86-LABEL: test_mm512_maskz_add_round_ps_rn_sae:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x58,0xc1]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mm512_maskz_add_round_ps_rn_sae:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x58,0xc1]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
define <16 x float> @test_mm512_maskz_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; X86-LABEL: test_mm512_maskz_add_round_ps_rd_sae:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x58,0xc1]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mm512_maskz_add_round_ps_rd_sae:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x58,0xc1]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
define <16 x float> @test_mm512_maskz_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; X86-LABEL: test_mm512_maskz_add_round_ps_ru_sae:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x58,0xc1]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mm512_maskz_add_round_ps_ru_sae:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x58,0xc1]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_maskz_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; X86-LABEL: test_mm512_maskz_add_round_ps_rz_sae:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x58,0xc1]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mm512_maskz_add_round_ps_rz_sae:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x58,0xc1]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
|
||||
define <16 x float> @test_mm512_maskz_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; X86-LABEL: test_mm512_maskz_add_round_ps_current:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vaddps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x58,0xc1]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mm512_maskz_add_round_ps_current:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vaddps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x58,0xc1]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_mask_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
; X86-LABEL: test_mm512_mask_add_round_ps_rn_sae:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x58,0xd1]
|
||||
; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mm512_mask_add_round_ps_rn_sae:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x58,0xd1]
|
||||
; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
; X86-LABEL: test_mm512_mask_add_round_ps_rd_sae:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x58,0xd1]
|
||||
; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mm512_mask_add_round_ps_rd_sae:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x58,0xd1]
|
||||
; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
; X86-LABEL: test_mm512_mask_add_round_ps_ru_sae:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x58,0xd1]
|
||||
; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mm512_mask_add_round_ps_ru_sae:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x58,0xd1]
|
||||
; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_mask_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
; X86-LABEL: test_mm512_mask_add_round_ps_rz_sae:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x58,0xd1]
|
||||
; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mm512_mask_add_round_ps_rz_sae:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x58,0xd1]
|
||||
; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
|
||||
define <16 x float> @test_mm512_mask_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
; X86-LABEL: test_mm512_mask_add_round_ps_current:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vaddps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x58,0xd1]
|
||||
; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mm512_mask_add_round_ps_current:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vaddps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x58,0xd1]
|
||||
; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
|
||||
define <16 x float> @test_mm512_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_add_round_ps_rn_sae:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x58,0xc1]
|
||||
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
define <16 x float> @test_mm512_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_add_round_ps_rd_sae:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x58,0xc1]
|
||||
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
define <16 x float> @test_mm512_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_add_round_ps_ru_sae:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x58,0xc1]
|
||||
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_add_round_ps_rz_sae:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x58,0xc1]
|
||||
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_add_round_ps_current:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1]
|
||||
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
declare <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
|
||||
|
||||
define <16 x float> @test_mm512_mask_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
; X86-LABEL: test_mm512_mask_sub_round_ps_rn_sae:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5c,0xd1]
|
||||
; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mm512_mask_sub_round_ps_rn_sae:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5c,0xd1]
|
||||
; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
; X86-LABEL: test_mm512_mask_sub_round_ps_rd_sae:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x5c,0xd1]
|
||||
; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mm512_mask_sub_round_ps_rd_sae:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x5c,0xd1]
|
||||
; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
; X86-LABEL: test_mm512_mask_sub_round_ps_ru_sae:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x5c,0xd1]
|
||||
; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mm512_mask_sub_round_ps_ru_sae:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x5c,0xd1]
|
||||
; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_mask_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
; X86-LABEL: test_mm512_mask_sub_round_ps_rz_sae:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x5c,0xd1]
|
||||
; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mm512_mask_sub_round_ps_rz_sae:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x5c,0xd1]
|
||||
; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
|
||||
define <16 x float> @test_mm512_mask_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
; X86-LABEL: test_mm512_mask_sub_round_ps_current:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vsubps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5c,0xd1]
|
||||
; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mm512_mask_sub_round_ps_current:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vsubps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5c,0xd1]
|
||||
; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_sub_round_ps_rn_sae:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5c,0xc1]
|
||||
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
define <16 x float> @test_mm512_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_sub_round_ps_rd_sae:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x5c,0xc1]
|
||||
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
define <16 x float> @test_mm512_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_sub_round_ps_ru_sae:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x5c,0xc1]
|
||||
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_sub_round_ps_rz_sae:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x5c,0xc1]
|
||||
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_sub_round_ps_current:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vsubps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x5c,0xc1]
|
||||
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_maskz_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; X86-LABEL: test_mm512_maskz_div_round_ps_rn_sae:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x5e,0xc1]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mm512_maskz_div_round_ps_rn_sae:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x5e,0xc1]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
define <16 x float> @test_mm512_maskz_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; X86-LABEL: test_mm512_maskz_div_round_ps_rd_sae:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x5e,0xc1]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mm512_maskz_div_round_ps_rd_sae:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x5e,0xc1]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
define <16 x float> @test_mm512_maskz_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; X86-LABEL: test_mm512_maskz_div_round_ps_ru_sae:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x5e,0xc1]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mm512_maskz_div_round_ps_ru_sae:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x5e,0xc1]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_maskz_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; X86-LABEL: test_mm512_maskz_div_round_ps_rz_sae:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x5e,0xc1]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mm512_maskz_div_round_ps_rz_sae:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x5e,0xc1]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
|
||||
define <16 x float> @test_mm512_maskz_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; X86-LABEL: test_mm512_maskz_div_round_ps_current:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vdivps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x5e,0xc1]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mm512_maskz_div_round_ps_current:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vdivps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x5e,0xc1]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_mask_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
; X86-LABEL: test_mm512_mask_div_round_ps_rn_sae:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5e,0xd1]
|
||||
; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mm512_mask_div_round_ps_rn_sae:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5e,0xd1]
|
||||
; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
; X86-LABEL: test_mm512_mask_div_round_ps_rd_sae:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x5e,0xd1]
|
||||
; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mm512_mask_div_round_ps_rd_sae:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x5e,0xd1]
|
||||
; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
; X86-LABEL: test_mm512_mask_div_round_ps_ru_sae:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x5e,0xd1]
|
||||
; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mm512_mask_div_round_ps_ru_sae:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x5e,0xd1]
|
||||
; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_mask_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
; X86-LABEL: test_mm512_mask_div_round_ps_rz_sae:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x5e,0xd1]
|
||||
; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mm512_mask_div_round_ps_rz_sae:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x5e,0xd1]
|
||||
; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
|
||||
define <16 x float> @test_mm512_mask_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
; X86-LABEL: test_mm512_mask_div_round_ps_current:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vdivps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5e,0xd1]
|
||||
; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mm512_mask_div_round_ps_current:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vdivps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5e,0xd1]
|
||||
; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
|
||||
define <16 x float> @test_mm512_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_div_round_ps_rn_sae:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5e,0xc1]
|
||||
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
define <16 x float> @test_mm512_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_div_round_ps_rd_sae:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x5e,0xc1]
|
||||
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
define <16 x float> @test_mm512_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_div_round_ps_ru_sae:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x5e,0xc1]
|
||||
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_div_round_ps_rz_sae:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x5e,0xc1]
|
||||
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_div_round_ps_current:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x5e,0xc1]
|
||||
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
declare <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
|
||||
|
|
|
@ -1186,18 +1186,17 @@ define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) {
|
|||
|
||||
declare void @llvm.x86.avx512.mask.store.ss(i8*, <4 x float>, i8 )
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
|
||||
declare <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
|
||||
declare <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
|
||||
declare <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float>, <16 x float>, i32)
|
||||
declare <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float>, <16 x float>, i32)
|
||||
declare <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double>, <8 x double>, i32)
|
||||
|
||||
define <16 x float> @test_vsubps_rn(<16 x float> %a0, <16 x float> %a1) {
|
||||
; CHECK-LABEL: test_vsubps_rn:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
|
||||
<16 x float> zeroinitializer, i16 -1, i32 0)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
|
||||
ret <16 x float> %1
|
||||
}
|
||||
|
||||
define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) {
|
||||
|
@ -1205,9 +1204,8 @@ define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) {
|
|||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
|
||||
<16 x float> zeroinitializer, i16 -1, i32 1)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
|
||||
ret <16 x float> %1
|
||||
}
|
||||
|
||||
define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) {
|
||||
|
@ -1215,9 +1213,8 @@ define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) {
|
|||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
|
||||
<16 x float> zeroinitializer, i16 -1, i32 2)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
|
||||
ret <16 x float> %1
|
||||
}
|
||||
|
||||
define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) {
|
||||
|
@ -1225,9 +1222,8 @@ define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) {
|
|||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
|
||||
<16 x float> zeroinitializer, i16 -1, i32 3)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
|
||||
ret <16 x float> %1
|
||||
}
|
||||
|
||||
define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) {
|
||||
|
@ -1235,9 +1231,8 @@ define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) {
|
|||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
|
||||
<16 x float> zeroinitializer, i16 -1, i32 0)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
|
||||
ret <16 x float> %1
|
||||
}
|
||||
|
||||
define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) {
|
||||
|
@ -1245,9 +1240,8 @@ define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) {
|
|||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
|
||||
<16 x float> zeroinitializer, i16 -1, i32 1)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
|
||||
ret <16 x float> %1
|
||||
}
|
||||
|
||||
define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) {
|
||||
|
@ -1255,9 +1249,8 @@ define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) {
|
|||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
|
||||
<16 x float> zeroinitializer, i16 -1, i32 2)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
|
||||
ret <16 x float> %1
|
||||
}
|
||||
|
||||
define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) {
|
||||
|
@ -1265,9 +1258,8 @@ define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) {
|
|||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
|
||||
<16 x float> zeroinitializer, i16 -1, i32 3)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
|
||||
ret <16 x float> %1
|
||||
}
|
||||
|
||||
;; mask float
|
||||
|
@ -1277,9 +1269,10 @@ define <16 x float> @test_vmulps_mask_rn(<16 x float> %a0, <16 x float> %a1, i16
|
|||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
|
||||
<16 x float> zeroinitializer, i16 %mask, i32 0)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
|
@ -1288,9 +1281,10 @@ define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16
|
|||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
|
||||
<16 x float> zeroinitializer, i16 %mask, i32 1)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
|
@ -1299,9 +1293,10 @@ define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16
|
|||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
|
||||
<16 x float> zeroinitializer, i16 %mask, i32 2)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
|
@ -1310,9 +1305,10 @@ define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16
|
|||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
|
||||
<16 x float> zeroinitializer, i16 %mask, i32 3)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
;; With Passthru value
|
||||
|
@ -1323,9 +1319,10 @@ define <16 x float> @test_vmulps_mask_passthru_rn(<16 x float> %a0, <16 x float>
|
|||
; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
|
||||
<16 x float> %passthru, i16 %mask, i32 0)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
|
||||
|
@ -1335,9 +1332,10 @@ define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float>
|
|||
; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
|
||||
<16 x float> %passthru, i16 %mask, i32 1)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
|
||||
|
@ -1347,9 +1345,10 @@ define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float>
|
|||
; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
|
||||
<16 x float> %passthru, i16 %mask, i32 2)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
|
||||
|
@ -1359,9 +1358,10 @@ define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float>
|
|||
; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
|
||||
<16 x float> %passthru, i16 %mask, i32 3)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
;; mask double
|
||||
|
@ -1371,9 +1371,10 @@ define <8 x double> @test_vmulpd_mask_rn(<8 x double> %a0, <8 x double> %a1, i8
|
|||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vmulpd {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
|
||||
<8 x double> zeroinitializer, i8 %mask, i32 0)
|
||||
ret <8 x double> %res
|
||||
%1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 0)
|
||||
%2 = bitcast i8 %mask to <8 x i1>
|
||||
%3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
|
||||
ret <8 x double> %3
|
||||
}
|
||||
|
||||
define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
|
||||
|
@ -1382,9 +1383,10 @@ define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8
|
|||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vmulpd {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
|
||||
<8 x double> zeroinitializer, i8 %mask, i32 1)
|
||||
ret <8 x double> %res
|
||||
%1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 1)
|
||||
%2 = bitcast i8 %mask to <8 x i1>
|
||||
%3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
|
||||
ret <8 x double> %3
|
||||
}
|
||||
|
||||
define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
|
||||
|
@ -1393,9 +1395,10 @@ define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8
|
|||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vmulpd {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
|
||||
<8 x double> zeroinitializer, i8 %mask, i32 2)
|
||||
ret <8 x double> %res
|
||||
%1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 2)
|
||||
%2 = bitcast i8 %mask to <8 x i1>
|
||||
%3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
|
||||
ret <8 x double> %3
|
||||
}
|
||||
|
||||
define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
|
||||
|
@ -1404,9 +1407,10 @@ define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8
|
|||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vmulpd {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
|
||||
<8 x double> zeroinitializer, i8 %mask, i32 3)
|
||||
ret <8 x double> %res
|
||||
%1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 3)
|
||||
%2 = bitcast i8 %mask to <8 x i1>
|
||||
%3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
|
||||
ret <8 x double> %3
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_maskz_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
|
@ -1415,26 +1419,34 @@ define <16 x float> @test_mm512_maskz_add_round_ps_rn_sae(<16 x float> %a0, <16
|
|||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_maskz_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_maskz_add_round_ps_rd_sae:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_maskz_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_maskz_add_round_ps_ru_sae:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_maskz_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
|
@ -1443,19 +1455,22 @@ define <16 x float> @test_mm512_maskz_add_round_ps_rz_sae(<16 x float> %a0, <16
|
|||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
|
||||
define <16 x float> @test_mm512_maskz_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_maskz_add_round_ps_current:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_mask_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
|
@ -1465,9 +1480,12 @@ define <16 x float> @test_mm512_mask_add_round_ps_rn_sae(<16 x float> %a0, <16 x
|
|||
; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_mask_add_round_ps_rd_sae:
|
||||
; CHECK: ## %bb.0:
|
||||
|
@ -1475,9 +1493,12 @@ define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x
|
|||
; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_mask_add_round_ps_ru_sae:
|
||||
; CHECK: ## %bb.0:
|
||||
|
@ -1485,8 +1506,10 @@ define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x
|
|||
; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_mask_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
|
@ -1496,11 +1519,12 @@ define <16 x float> @test_mm512_mask_add_round_ps_rz_sae(<16 x float> %a0, <16 x
|
|||
; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
|
||||
define <16 x float> @test_mm512_mask_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_mask_add_round_ps_current:
|
||||
; CHECK: ## %bb.0:
|
||||
|
@ -1508,34 +1532,37 @@ define <16 x float> @test_mm512_mask_add_round_ps_current(<16 x float> %a0, <16
|
|||
; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
|
||||
define <16 x float> @test_mm512_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_add_round_ps_rn_sae:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
|
||||
ret <16 x float> %1
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_add_round_ps_rd_sae:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
|
||||
ret <16 x float> %1
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_add_round_ps_ru_sae:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
|
||||
ret <16 x float> %1
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
|
@ -1543,8 +1570,8 @@ define <16 x float> @test_mm512_add_round_ps_rz_sae(<16 x float> %a0, <16 x floa
|
|||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
|
||||
ret <16 x float> %1
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
|
@ -1552,10 +1579,10 @@ define <16 x float> @test_mm512_add_round_ps_current(<16 x float> %a0, <16 x flo
|
|||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
|
||||
ret <16 x float> %1
|
||||
}
|
||||
declare <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
|
||||
declare <16 x float> @llvm.x86.avx512..add.ps.512(<16 x float>, <16 x float>, i32)
|
||||
|
||||
define <16 x float> @test_mm512_mask_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_mask_sub_round_ps_rn_sae:
|
||||
|
@ -1564,9 +1591,12 @@ define <16 x float> @test_mm512_mask_sub_round_ps_rn_sae(<16 x float> %a0, <16 x
|
|||
; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_mask_sub_round_ps_rd_sae:
|
||||
; CHECK: ## %bb.0:
|
||||
|
@ -1574,9 +1604,12 @@ define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x
|
|||
; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_mask_sub_round_ps_ru_sae:
|
||||
; CHECK: ## %bb.0:
|
||||
|
@ -1584,8 +1617,10 @@ define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x
|
|||
; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_mask_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
|
@ -1595,11 +1630,12 @@ define <16 x float> @test_mm512_mask_sub_round_ps_rz_sae(<16 x float> %a0, <16 x
|
|||
; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
|
||||
define <16 x float> @test_mm512_mask_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_mask_sub_round_ps_current:
|
||||
; CHECK: ## %bb.0:
|
||||
|
@ -1607,8 +1643,10 @@ define <16 x float> @test_mm512_mask_sub_round_ps_current(<16 x float> %a0, <16
|
|||
; CHECK-NEXT: vsubps %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
|
@ -1616,24 +1654,26 @@ define <16 x float> @test_mm512_sub_round_ps_rn_sae(<16 x float> %a0, <16 x floa
|
|||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
|
||||
ret <16 x float> %1
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_sub_round_ps_rd_sae:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
|
||||
ret <16 x float> %1
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_sub_round_ps_ru_sae:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
|
||||
ret <16 x float> %1
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
|
@ -1641,8 +1681,8 @@ define <16 x float> @test_mm512_sub_round_ps_rz_sae(<16 x float> %a0, <16 x floa
|
|||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
|
||||
ret <16 x float> %1
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
|
@ -1650,8 +1690,8 @@ define <16 x float> @test_mm512_sub_round_ps_current(<16 x float> %a0, <16 x flo
|
|||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vsubps %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
|
||||
ret <16 x float> %1
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_maskz_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
|
@ -1660,26 +1700,34 @@ define <16 x float> @test_mm512_maskz_div_round_ps_rn_sae(<16 x float> %a0, <16
|
|||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_maskz_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_maskz_div_round_ps_rd_sae:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_maskz_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_maskz_div_round_ps_ru_sae:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_maskz_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
|
@ -1688,19 +1736,22 @@ define <16 x float> @test_mm512_maskz_div_round_ps_rz_sae(<16 x float> %a0, <16
|
|||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
|
||||
define <16 x float> @test_mm512_maskz_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_maskz_div_round_ps_current:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_mask_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
|
@ -1710,9 +1761,12 @@ define <16 x float> @test_mm512_mask_div_round_ps_rn_sae(<16 x float> %a0, <16 x
|
|||
; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_mask_div_round_ps_rd_sae:
|
||||
; CHECK: ## %bb.0:
|
||||
|
@ -1720,9 +1774,12 @@ define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x
|
|||
; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_mask_div_round_ps_ru_sae:
|
||||
; CHECK: ## %bb.0:
|
||||
|
@ -1730,8 +1787,10 @@ define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x
|
|||
; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_mask_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
|
@ -1741,11 +1800,12 @@ define <16 x float> @test_mm512_mask_div_round_ps_rz_sae(<16 x float> %a0, <16 x
|
|||
; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
|
||||
define <16 x float> @test_mm512_mask_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_mask_div_round_ps_current:
|
||||
; CHECK: ## %bb.0:
|
||||
|
@ -1753,34 +1813,37 @@ define <16 x float> @test_mm512_mask_div_round_ps_current(<16 x float> %a0, <16
|
|||
; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
|
||||
define <16 x float> @test_mm512_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_div_round_ps_rn_sae:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
|
||||
ret <16 x float> %1
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_div_round_ps_rd_sae:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
|
||||
ret <16 x float> %1
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_div_round_ps_ru_sae:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
|
||||
ret <16 x float> %1
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
|
@ -1788,8 +1851,8 @@ define <16 x float> @test_mm512_div_round_ps_rz_sae(<16 x float> %a0, <16 x floa
|
|||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
|
||||
ret <16 x float> %1
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
|
@ -1797,10 +1860,10 @@ define <16 x float> @test_mm512_div_round_ps_current(<16 x float> %a0, <16 x flo
|
|||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
|
||||
ret <16 x float> %res
|
||||
%1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
|
||||
ret <16 x float> %1
|
||||
}
|
||||
declare <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
|
||||
declare <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float>, <16 x float>, i32)
|
||||
|
||||
define <16 x float> @test_mm512_maskz_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_maskz_min_round_ps_sae:
|
||||
|
|
|
@ -2554,23 +2554,23 @@ define <64 x i8> @undef_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passt
|
|||
ret <64 x i8> %3
|
||||
}
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
|
||||
declare <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float>, <16 x float>, i32)
|
||||
|
||||
define <16 x float> @test_add_ps(<16 x float> %a, <16 x float> %b) {
|
||||
; CHECK-LABEL: @test_add_ps(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fadd <16 x float> [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: ret <16 x float> [[TMP1]]
|
||||
;
|
||||
%1 = tail call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> undef, i16 -1, i32 4)
|
||||
%1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a, <16 x float> %b, i32 4)
|
||||
ret <16 x float> %1
|
||||
}
|
||||
|
||||
define <16 x float> @test_add_ps_round(<16 x float> %a, <16 x float> %b) {
|
||||
; CHECK-LABEL: @test_add_ps_round(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], <16 x float> undef, i16 -1, i32 8)
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8)
|
||||
; CHECK-NEXT: ret <16 x float> [[TMP1]]
|
||||
;
|
||||
%1 = tail call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> undef, i16 -1, i32 8)
|
||||
%1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a, <16 x float> %b, i32 8)
|
||||
ret <16 x float> %1
|
||||
}
|
||||
|
||||
|
@ -2581,36 +2581,42 @@ define <16 x float> @test_add_ps_mask(<16 x float> %a, <16 x float> %b, <16 x fl
|
|||
; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
|
||||
; CHECK-NEXT: ret <16 x float> [[TMP3]]
|
||||
;
|
||||
%1 = tail call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 4)
|
||||
ret <16 x float> %1
|
||||
%1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a, <16 x float> %b, i32 4)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
define <16 x float> @test_add_ps_mask_round(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
|
||||
; CHECK-LABEL: @test_add_ps_mask_round(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], <16 x float> [[C:%.*]], i16 [[MASK:%.*]], i32 8)
|
||||
; CHECK-NEXT: ret <16 x float> [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8)
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
|
||||
; CHECK-NEXT: ret <16 x float> [[TMP3]]
|
||||
;
|
||||
%1 = tail call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 8)
|
||||
ret <16 x float> %1
|
||||
%1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a, <16 x float> %b, i32 8)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
declare <8 x double> @llvm.x86.avx512.mask.add.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
|
||||
declare <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double>, <8 x double>, i32)
|
||||
|
||||
define <8 x double> @test_add_pd(<8 x double> %a, <8 x double> %b) {
|
||||
; CHECK-LABEL: @test_add_pd(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x double> [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: ret <8 x double> [[TMP1]]
|
||||
;
|
||||
%1 = tail call <8 x double> @llvm.x86.avx512.mask.add.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> undef, i8 -1, i32 4)
|
||||
%1 = call <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double> %a, <8 x double> %b, i32 4)
|
||||
ret <8 x double> %1
|
||||
}
|
||||
|
||||
define <8 x double> @test_add_pd_round(<8 x double> %a, <8 x double> %b) {
|
||||
; CHECK-LABEL: @test_add_pd_round(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x double> @llvm.x86.avx512.mask.add.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], <8 x double> undef, i8 -1, i32 8)
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8)
|
||||
; CHECK-NEXT: ret <8 x double> [[TMP1]]
|
||||
;
|
||||
%1 = tail call <8 x double> @llvm.x86.avx512.mask.add.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> undef, i8 -1, i32 8)
|
||||
%1 = call <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double> %a, <8 x double> %b, i32 8)
|
||||
ret <8 x double> %1
|
||||
}
|
||||
|
||||
|
@ -2621,36 +2627,42 @@ define <8 x double> @test_add_pd_mask(<8 x double> %a, <8 x double> %b, <8 x dou
|
|||
; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
|
||||
; CHECK-NEXT: ret <8 x double> [[TMP3]]
|
||||
;
|
||||
%1 = tail call <8 x double> @llvm.x86.avx512.mask.add.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask, i32 4)
|
||||
ret <8 x double> %1
|
||||
%1 = call <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double> %a, <8 x double> %b, i32 4)
|
||||
%2 = bitcast i8 %mask to <8 x i1>
|
||||
%3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c
|
||||
ret <8 x double> %3
|
||||
}
|
||||
|
||||
define <8 x double> @test_add_pd_mask_round(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
|
||||
; CHECK-LABEL: @test_add_pd_mask_round(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x double> @llvm.x86.avx512.mask.add.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], <8 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
|
||||
; CHECK-NEXT: ret <8 x double> [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8)
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
|
||||
; CHECK-NEXT: ret <8 x double> [[TMP3]]
|
||||
;
|
||||
%1 = tail call <8 x double> @llvm.x86.avx512.mask.add.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask, i32 8)
|
||||
ret <8 x double> %1
|
||||
%1 = call <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double> %a, <8 x double> %b, i32 8)
|
||||
%2 = bitcast i8 %mask to <8 x i1>
|
||||
%3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c
|
||||
ret <8 x double> %3
|
||||
}
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
|
||||
declare <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float>, <16 x float>, i32)
|
||||
|
||||
define <16 x float> @test_sub_ps(<16 x float> %a, <16 x float> %b) {
|
||||
; CHECK-LABEL: @test_sub_ps(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fsub <16 x float> [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: ret <16 x float> [[TMP1]]
|
||||
;
|
||||
%1 = tail call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> undef, i16 -1, i32 4)
|
||||
%1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a, <16 x float> %b, i32 4)
|
||||
ret <16 x float> %1
|
||||
}
|
||||
|
||||
define <16 x float> @test_sub_ps_round(<16 x float> %a, <16 x float> %b) {
|
||||
; CHECK-LABEL: @test_sub_ps_round(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], <16 x float> undef, i16 -1, i32 8)
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8)
|
||||
; CHECK-NEXT: ret <16 x float> [[TMP1]]
|
||||
;
|
||||
%1 = tail call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> undef, i16 -1, i32 8)
|
||||
%1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a, <16 x float> %b, i32 8)
|
||||
ret <16 x float> %1
|
||||
}
|
||||
|
||||
|
@ -2661,36 +2673,42 @@ define <16 x float> @test_sub_ps_mask(<16 x float> %a, <16 x float> %b, <16 x fl
|
|||
; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
|
||||
; CHECK-NEXT: ret <16 x float> [[TMP3]]
|
||||
;
|
||||
%1 = tail call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 4)
|
||||
ret <16 x float> %1
|
||||
%1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a, <16 x float> %b, i32 4)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
define <16 x float> @test_sub_ps_mask_round(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
|
||||
; CHECK-LABEL: @test_sub_ps_mask_round(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], <16 x float> [[C:%.*]], i16 [[MASK:%.*]], i32 8)
|
||||
; CHECK-NEXT: ret <16 x float> [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8)
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
|
||||
; CHECK-NEXT: ret <16 x float> [[TMP3]]
|
||||
;
|
||||
%1 = tail call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 8)
|
||||
ret <16 x float> %1
|
||||
%1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a, <16 x float> %b, i32 8)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
declare <8 x double> @llvm.x86.avx512.mask.sub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
|
||||
declare <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double>, <8 x double>, i32)
|
||||
|
||||
define <8 x double> @test_sub_pd(<8 x double> %a, <8 x double> %b) {
|
||||
; CHECK-LABEL: @test_sub_pd(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fsub <8 x double> [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: ret <8 x double> [[TMP1]]
|
||||
;
|
||||
%1 = tail call <8 x double> @llvm.x86.avx512.mask.sub.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> undef, i8 -1, i32 4)
|
||||
%1 = call <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double> %a, <8 x double> %b, i32 4)
|
||||
ret <8 x double> %1
|
||||
}
|
||||
|
||||
define <8 x double> @test_sub_pd_round(<8 x double> %a, <8 x double> %b) {
|
||||
; CHECK-LABEL: @test_sub_pd_round(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x double> @llvm.x86.avx512.mask.sub.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], <8 x double> undef, i8 -1, i32 8)
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8)
|
||||
; CHECK-NEXT: ret <8 x double> [[TMP1]]
|
||||
;
|
||||
%1 = tail call <8 x double> @llvm.x86.avx512.mask.sub.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> undef, i8 -1, i32 8)
|
||||
%1 = call <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double> %a, <8 x double> %b, i32 8)
|
||||
ret <8 x double> %1
|
||||
}
|
||||
|
||||
|
@ -2701,36 +2719,42 @@ define <8 x double> @test_sub_pd_mask(<8 x double> %a, <8 x double> %b, <8 x dou
|
|||
; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
|
||||
; CHECK-NEXT: ret <8 x double> [[TMP3]]
|
||||
;
|
||||
%1 = tail call <8 x double> @llvm.x86.avx512.mask.sub.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask, i32 4)
|
||||
ret <8 x double> %1
|
||||
%1 = call <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double> %a, <8 x double> %b, i32 4)
|
||||
%2 = bitcast i8 %mask to <8 x i1>
|
||||
%3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c
|
||||
ret <8 x double> %3
|
||||
}
|
||||
|
||||
define <8 x double> @test_sub_pd_mask_round(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
|
||||
; CHECK-LABEL: @test_sub_pd_mask_round(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x double> @llvm.x86.avx512.mask.sub.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], <8 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
|
||||
; CHECK-NEXT: ret <8 x double> [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8)
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
|
||||
; CHECK-NEXT: ret <8 x double> [[TMP3]]
|
||||
;
|
||||
%1 = tail call <8 x double> @llvm.x86.avx512.mask.sub.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask, i32 8)
|
||||
ret <8 x double> %1
|
||||
%1 = call <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double> %a, <8 x double> %b, i32 8)
|
||||
%2 = bitcast i8 %mask to <8 x i1>
|
||||
%3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c
|
||||
ret <8 x double> %3
|
||||
}
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
|
||||
declare <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float>, <16 x float>, i32)
|
||||
|
||||
define <16 x float> @test_mul_ps(<16 x float> %a, <16 x float> %b) {
|
||||
; CHECK-LABEL: @test_mul_ps(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fmul <16 x float> [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: ret <16 x float> [[TMP1]]
|
||||
;
|
||||
%1 = tail call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> undef, i16 -1, i32 4)
|
||||
%1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a, <16 x float> %b, i32 4)
|
||||
ret <16 x float> %1
|
||||
}
|
||||
|
||||
define <16 x float> @test_mul_ps_round(<16 x float> %a, <16 x float> %b) {
|
||||
; CHECK-LABEL: @test_mul_ps_round(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], <16 x float> undef, i16 -1, i32 8)
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8)
|
||||
; CHECK-NEXT: ret <16 x float> [[TMP1]]
|
||||
;
|
||||
%1 = tail call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> undef, i16 -1, i32 8)
|
||||
%1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a, <16 x float> %b, i32 8)
|
||||
ret <16 x float> %1
|
||||
}
|
||||
|
||||
|
@ -2741,36 +2765,42 @@ define <16 x float> @test_mul_ps_mask(<16 x float> %a, <16 x float> %b, <16 x fl
|
|||
; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
|
||||
; CHECK-NEXT: ret <16 x float> [[TMP3]]
|
||||
;
|
||||
%1 = tail call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 4)
|
||||
ret <16 x float> %1
|
||||
%1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a, <16 x float> %b, i32 4)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
define <16 x float> @test_mul_ps_mask_round(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
|
||||
; CHECK-LABEL: @test_mul_ps_mask_round(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], <16 x float> [[C:%.*]], i16 [[MASK:%.*]], i32 8)
|
||||
; CHECK-NEXT: ret <16 x float> [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8)
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
|
||||
; CHECK-NEXT: ret <16 x float> [[TMP3]]
|
||||
;
|
||||
%1 = tail call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 8)
|
||||
ret <16 x float> %1
|
||||
%1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a, <16 x float> %b, i32 8)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
declare <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
|
||||
declare <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double>, <8 x double>, i32)
|
||||
|
||||
define <8 x double> @test_mul_pd(<8 x double> %a, <8 x double> %b) {
|
||||
; CHECK-LABEL: @test_mul_pd(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fmul <8 x double> [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: ret <8 x double> [[TMP1]]
|
||||
;
|
||||
%1 = tail call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> undef, i8 -1, i32 4)
|
||||
%1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a, <8 x double> %b, i32 4)
|
||||
ret <8 x double> %1
|
||||
}
|
||||
|
||||
define <8 x double> @test_mul_pd_round(<8 x double> %a, <8 x double> %b) {
|
||||
; CHECK-LABEL: @test_mul_pd_round(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], <8 x double> undef, i8 -1, i32 8)
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8)
|
||||
; CHECK-NEXT: ret <8 x double> [[TMP1]]
|
||||
;
|
||||
%1 = tail call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> undef, i8 -1, i32 8)
|
||||
%1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a, <8 x double> %b, i32 8)
|
||||
ret <8 x double> %1
|
||||
}
|
||||
|
||||
|
@ -2781,36 +2811,42 @@ define <8 x double> @test_mul_pd_mask(<8 x double> %a, <8 x double> %b, <8 x dou
|
|||
; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
|
||||
; CHECK-NEXT: ret <8 x double> [[TMP3]]
|
||||
;
|
||||
%1 = tail call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask, i32 4)
|
||||
ret <8 x double> %1
|
||||
%1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a, <8 x double> %b, i32 4)
|
||||
%2 = bitcast i8 %mask to <8 x i1>
|
||||
%3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c
|
||||
ret <8 x double> %3
|
||||
}
|
||||
|
||||
define <8 x double> @test_mul_pd_mask_round(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
|
||||
; CHECK-LABEL: @test_mul_pd_mask_round(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], <8 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
|
||||
; CHECK-NEXT: ret <8 x double> [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8)
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
|
||||
; CHECK-NEXT: ret <8 x double> [[TMP3]]
|
||||
;
|
||||
%1 = tail call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask, i32 8)
|
||||
ret <8 x double> %1
|
||||
%1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a, <8 x double> %b, i32 8)
|
||||
%2 = bitcast i8 %mask to <8 x i1>
|
||||
%3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c
|
||||
ret <8 x double> %3
|
||||
}
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
|
||||
declare <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float>, <16 x float>, i32)
|
||||
|
||||
define <16 x float> @test_div_ps(<16 x float> %a, <16 x float> %b) {
|
||||
; CHECK-LABEL: @test_div_ps(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fdiv <16 x float> [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: ret <16 x float> [[TMP1]]
|
||||
;
|
||||
%1 = tail call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> undef, i16 -1, i32 4)
|
||||
%1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a, <16 x float> %b, i32 4)
|
||||
ret <16 x float> %1
|
||||
}
|
||||
|
||||
define <16 x float> @test_div_ps_round(<16 x float> %a, <16 x float> %b) {
|
||||
; CHECK-LABEL: @test_div_ps_round(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], <16 x float> undef, i16 -1, i32 8)
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8)
|
||||
; CHECK-NEXT: ret <16 x float> [[TMP1]]
|
||||
;
|
||||
%1 = tail call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> undef, i16 -1, i32 8)
|
||||
%1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a, <16 x float> %b, i32 8)
|
||||
ret <16 x float> %1
|
||||
}
|
||||
|
||||
|
@ -2821,36 +2857,42 @@ define <16 x float> @test_div_ps_mask(<16 x float> %a, <16 x float> %b, <16 x fl
|
|||
; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
|
||||
; CHECK-NEXT: ret <16 x float> [[TMP3]]
|
||||
;
|
||||
%1 = tail call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 4)
|
||||
ret <16 x float> %1
|
||||
%1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a, <16 x float> %b, i32 4)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
define <16 x float> @test_div_ps_mask_round(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
|
||||
; CHECK-LABEL: @test_div_ps_mask_round(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], <16 x float> [[C:%.*]], i16 [[MASK:%.*]], i32 8)
|
||||
; CHECK-NEXT: ret <16 x float> [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8)
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
|
||||
; CHECK-NEXT: ret <16 x float> [[TMP3]]
|
||||
;
|
||||
%1 = tail call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 8)
|
||||
ret <16 x float> %1
|
||||
%1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a, <16 x float> %b, i32 8)
|
||||
%2 = bitcast i16 %mask to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c
|
||||
ret <16 x float> %3
|
||||
}
|
||||
|
||||
declare <8 x double> @llvm.x86.avx512.mask.div.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
|
||||
declare <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double>, <8 x double>, i32)
|
||||
|
||||
define <8 x double> @test_div_pd(<8 x double> %a, <8 x double> %b) {
|
||||
; CHECK-LABEL: @test_div_pd(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fdiv <8 x double> [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: ret <8 x double> [[TMP1]]
|
||||
;
|
||||
%1 = tail call <8 x double> @llvm.x86.avx512.mask.div.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> undef, i8 -1, i32 4)
|
||||
%1 = call <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double> %a, <8 x double> %b, i32 4)
|
||||
ret <8 x double> %1
|
||||
}
|
||||
|
||||
define <8 x double> @test_div_pd_round(<8 x double> %a, <8 x double> %b) {
|
||||
; CHECK-LABEL: @test_div_pd_round(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x double> @llvm.x86.avx512.mask.div.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], <8 x double> undef, i8 -1, i32 8)
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8)
|
||||
; CHECK-NEXT: ret <8 x double> [[TMP1]]
|
||||
;
|
||||
%1 = tail call <8 x double> @llvm.x86.avx512.mask.div.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> undef, i8 -1, i32 8)
|
||||
%1 = call <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double> %a, <8 x double> %b, i32 8)
|
||||
ret <8 x double> %1
|
||||
}
|
||||
|
||||
|
@ -2861,17 +2903,23 @@ define <8 x double> @test_div_pd_mask(<8 x double> %a, <8 x double> %b, <8 x dou
|
|||
; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
|
||||
; CHECK-NEXT: ret <8 x double> [[TMP3]]
|
||||
;
|
||||
%1 = tail call <8 x double> @llvm.x86.avx512.mask.div.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask, i32 4)
|
||||
ret <8 x double> %1
|
||||
%1 = call <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double> %a, <8 x double> %b, i32 4)
|
||||
%2 = bitcast i8 %mask to <8 x i1>
|
||||
%3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c
|
||||
ret <8 x double> %3
|
||||
}
|
||||
|
||||
define <8 x double> @test_div_pd_mask_round(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
|
||||
; CHECK-LABEL: @test_div_pd_mask_round(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x double> @llvm.x86.avx512.mask.div.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], <8 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
|
||||
; CHECK-NEXT: ret <8 x double> [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8)
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
|
||||
; CHECK-NEXT: ret <8 x double> [[TMP3]]
|
||||
;
|
||||
%1 = tail call <8 x double> @llvm.x86.avx512.mask.div.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask, i32 8)
|
||||
ret <8 x double> %1
|
||||
%1 = call <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double> %a, <8 x double> %b, i32 8)
|
||||
%2 = bitcast i8 %mask to <8 x i1>
|
||||
%3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c
|
||||
ret <8 x double> %3
|
||||
}
|
||||
|
||||
declare i32 @llvm.x86.avx512.vcomi.ss(<4 x float>, <4 x float>, i32, i32)
|
||||
|
|
Loading…
Reference in New Issue