forked from OSchip/llvm-project
[X86] Rename the autoupgraded of packed fp compare and fpclass intrinsics that don't take a mask as input to exclude '.mask.' from their name.
I think the intrinsics named 'avx512.mask.' should refer to the previous behavior of taking a mask argument in the intrinsic instead of using a 'select' or 'and' instruction in IR to accomplish the masking. This is more consistent with the goal that eventually we will have no intrinsics that have masking builtin. When we reach that goal, we should have no intrinsics named "avx512.mask". llvm-svn: 335744
This commit is contained in:
parent
65f35e4afc
commit
31cbe75b3b
|
@ -1250,22 +1250,22 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||
Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty,
|
||||
llvm_v4i64_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_fpclass_pd_128 :
|
||||
def int_x86_avx512_fpclass_pd_128 :
|
||||
Intrinsic<[llvm_v2i1_ty], [llvm_v2f64_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_fpclass_pd_256 :
|
||||
def int_x86_avx512_fpclass_pd_256 :
|
||||
Intrinsic<[llvm_v4i1_ty], [llvm_v4f64_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_fpclass_pd_512 :
|
||||
def int_x86_avx512_fpclass_pd_512 :
|
||||
Intrinsic<[llvm_v8i1_ty], [llvm_v8f64_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_fpclass_ps_128 :
|
||||
def int_x86_avx512_fpclass_ps_128 :
|
||||
Intrinsic<[llvm_v4i1_ty], [llvm_v4f32_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_fpclass_ps_256 :
|
||||
def int_x86_avx512_fpclass_ps_256 :
|
||||
Intrinsic<[llvm_v8i1_ty], [llvm_v8f32_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_fpclass_ps_512 :
|
||||
def int_x86_avx512_fpclass_ps_512 :
|
||||
Intrinsic<[llvm_v16i1_ty], [llvm_v16f32_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_fpclass_sd :
|
||||
|
@ -5517,22 +5517,22 @@ let TargetPrefix = "x86" in {
|
|||
let TargetPrefix = "x86" in {
|
||||
// NOTE: These comparison intrinsics are not used by clang as long as the
|
||||
// distinction in signaling behaviour is not implemented.
|
||||
def int_x86_avx512_mask_cmp_ps_512 :
|
||||
def int_x86_avx512_cmp_ps_512 :
|
||||
Intrinsic<[llvm_v16i1_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_cmp_pd_512 :
|
||||
def int_x86_avx512_cmp_pd_512 :
|
||||
Intrinsic<[llvm_v8i1_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_cmp_ps_256 :
|
||||
def int_x86_avx512_cmp_ps_256 :
|
||||
Intrinsic<[llvm_v8i1_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_cmp_pd_256 :
|
||||
def int_x86_avx512_cmp_pd_256 :
|
||||
Intrinsic<[llvm_v4i1_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_cmp_ps_128 :
|
||||
def int_x86_avx512_cmp_ps_128 :
|
||||
Intrinsic<[llvm_v4i1_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_cmp_pd_128 :
|
||||
def int_x86_avx512_cmp_pd_128 :
|
||||
Intrinsic<[llvm_v2i1_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
|
|
|
@ -65,19 +65,6 @@ static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
|
|||
return true;
|
||||
}
|
||||
|
||||
// Upgrade the declaration of fp compare intrinsics that change return type
|
||||
// from scalar to vXi1 mask.
|
||||
static bool UpgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID,
|
||||
Function *&NewFn) {
|
||||
// Check if the return type is a vector.
|
||||
if (F->getReturnType()->isVectorTy())
|
||||
return false;
|
||||
|
||||
rename(F);
|
||||
NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
|
||||
// All of the intrinsics matches below should be marked with which llvm
|
||||
// version started autoupgrading them. At some point in the future we would
|
||||
|
@ -220,6 +207,7 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
|
|||
Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
|
||||
Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
|
||||
Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
|
||||
Name.startswith("avx512.mask.cmp.p") || // Added in 7.0
|
||||
Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
|
||||
Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
|
||||
Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
|
||||
|
@ -272,6 +260,7 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
|
|||
Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
|
||||
Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
|
||||
Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
|
||||
Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
|
||||
Name == "sse.cvtsi2ss" || // Added in 7.0
|
||||
Name == "sse.cvtsi642ss" || // Added in 7.0
|
||||
Name == "sse2.cvtsi2sd" || // Added in 7.0
|
||||
|
@ -388,42 +377,6 @@ static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
|
|||
if (Name == "avx2.mpsadbw") // Added in 3.6
|
||||
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
|
||||
NewFn);
|
||||
if (Name == "avx512.mask.cmp.pd.128") // Added in 7.0
|
||||
return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_128,
|
||||
NewFn);
|
||||
if (Name == "avx512.mask.cmp.pd.256") // Added in 7.0
|
||||
return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_256,
|
||||
NewFn);
|
||||
if (Name == "avx512.mask.cmp.pd.512") // Added in 7.0
|
||||
return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_512,
|
||||
NewFn);
|
||||
if (Name == "avx512.mask.cmp.ps.128") // Added in 7.0
|
||||
return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_128,
|
||||
NewFn);
|
||||
if (Name == "avx512.mask.cmp.ps.256") // Added in 7.0
|
||||
return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_256,
|
||||
NewFn);
|
||||
if (Name == "avx512.mask.cmp.ps.512") // Added in 7.0
|
||||
return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_512,
|
||||
NewFn);
|
||||
if (Name == "avx512.mask.fpclass.pd.128") // Added in 7.0
|
||||
return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_fpclass_pd_128,
|
||||
NewFn);
|
||||
if (Name == "avx512.mask.fpclass.pd.256") // Added in 7.0
|
||||
return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_fpclass_pd_256,
|
||||
NewFn);
|
||||
if (Name == "avx512.mask.fpclass.pd.512") // Added in 7.0
|
||||
return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_fpclass_pd_512,
|
||||
NewFn);
|
||||
if (Name == "avx512.mask.fpclass.ps.128") // Added in 7.0
|
||||
return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_fpclass_ps_128,
|
||||
NewFn);
|
||||
if (Name == "avx512.mask.fpclass.ps.256") // Added in 7.0
|
||||
return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_fpclass_ps_256,
|
||||
NewFn);
|
||||
if (Name == "avx512.mask.fpclass.ps.512") // Added in 7.0
|
||||
return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_fpclass_ps_512,
|
||||
NewFn);
|
||||
|
||||
// frcz.ss/sd may need to have an argument dropped. Added in 3.2
|
||||
if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
|
||||
|
@ -1013,8 +966,9 @@ static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) {
|
|||
}
|
||||
|
||||
// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
|
||||
static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder,Value *Vec, Value *Mask,
|
||||
unsigned NumElts) {
|
||||
static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
|
||||
Value *Mask) {
|
||||
unsigned NumElts = Vec->getType()->getVectorNumElements();
|
||||
if (Mask) {
|
||||
const auto *C = dyn_cast<Constant>(Mask);
|
||||
if (!C || !C->isAllOnesValue())
|
||||
|
@ -1060,7 +1014,7 @@ static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
|
|||
|
||||
Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1);
|
||||
|
||||
return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask, NumElts);
|
||||
return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
|
||||
}
|
||||
|
||||
// Replace a masked intrinsic with an older unmasked intrinsic.
|
||||
|
@ -1530,8 +1484,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
|||
ICmpInst::Predicate Pred =
|
||||
Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
|
||||
Rep = Builder.CreateICmp(Pred, Rep, Zero);
|
||||
unsigned NumElts = Op0->getType()->getVectorNumElements();
|
||||
Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask, NumElts);
|
||||
Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
|
||||
} else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
|
||||
unsigned NumElts =
|
||||
CI->getArgOperand(1)->getType()->getVectorNumElements();
|
||||
|
@ -1641,10 +1594,65 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
|||
// "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
|
||||
bool CmpEq = Name[16] == 'e';
|
||||
Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
|
||||
} else if (IsX86 && Name.startswith("avx512.mask.cmp")) {
|
||||
} else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
|
||||
Type *OpTy = CI->getArgOperand(0)->getType();
|
||||
unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
|
||||
unsigned EltWidth = OpTy->getScalarSizeInBits();
|
||||
Intrinsic::ID IID;
|
||||
if (VecWidth == 128 && EltWidth == 32)
|
||||
IID = Intrinsic::x86_avx512_fpclass_ps_128;
|
||||
else if (VecWidth == 256 && EltWidth == 32)
|
||||
IID = Intrinsic::x86_avx512_fpclass_ps_256;
|
||||
else if (VecWidth == 512 && EltWidth == 32)
|
||||
IID = Intrinsic::x86_avx512_fpclass_ps_512;
|
||||
else if (VecWidth == 128 && EltWidth == 64)
|
||||
IID = Intrinsic::x86_avx512_fpclass_pd_128;
|
||||
else if (VecWidth == 256 && EltWidth == 64)
|
||||
IID = Intrinsic::x86_avx512_fpclass_pd_256;
|
||||
else if (VecWidth == 512 && EltWidth == 64)
|
||||
IID = Intrinsic::x86_avx512_fpclass_pd_512;
|
||||
else
|
||||
llvm_unreachable("Unexpected intrinsic");
|
||||
|
||||
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
|
||||
{ CI->getOperand(0), CI->getArgOperand(1) });
|
||||
Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
|
||||
} else if (IsX86 && Name.startswith("avx512.mask.cmp.p")) {
|
||||
Type *OpTy = CI->getArgOperand(0)->getType();
|
||||
unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
|
||||
unsigned EltWidth = OpTy->getScalarSizeInBits();
|
||||
Intrinsic::ID IID;
|
||||
if (VecWidth == 128 && EltWidth == 32)
|
||||
IID = Intrinsic::x86_avx512_cmp_ps_128;
|
||||
else if (VecWidth == 256 && EltWidth == 32)
|
||||
IID = Intrinsic::x86_avx512_cmp_ps_256;
|
||||
else if (VecWidth == 512 && EltWidth == 32)
|
||||
IID = Intrinsic::x86_avx512_cmp_ps_512;
|
||||
else if (VecWidth == 128 && EltWidth == 64)
|
||||
IID = Intrinsic::x86_avx512_cmp_pd_128;
|
||||
else if (VecWidth == 256 && EltWidth == 64)
|
||||
IID = Intrinsic::x86_avx512_cmp_pd_256;
|
||||
else if (VecWidth == 512 && EltWidth == 64)
|
||||
IID = Intrinsic::x86_avx512_cmp_pd_512;
|
||||
else
|
||||
llvm_unreachable("Unexpected intrinsic");
|
||||
|
||||
SmallVector<Value *, 4> Args;
|
||||
Args.push_back(CI->getArgOperand(0));
|
||||
Args.push_back(CI->getArgOperand(1));
|
||||
Args.push_back(CI->getArgOperand(2));
|
||||
if (CI->getNumArgOperands() == 5)
|
||||
Args.push_back(CI->getArgOperand(4));
|
||||
|
||||
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
|
||||
Args);
|
||||
Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(3));
|
||||
} else if (IsX86 && Name.startswith("avx512.mask.cmp.") &&
|
||||
Name[16] != 'p') {
|
||||
// Integer compare intrinsics.
|
||||
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
|
||||
Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
|
||||
} else if (IsX86 && Name.startswith("avx512.mask.ucmp")) {
|
||||
} else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) {
|
||||
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
|
||||
Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
|
||||
} else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") ||
|
||||
|
@ -1654,8 +1662,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
|||
Value *Op = CI->getArgOperand(0);
|
||||
Value *Zero = llvm::Constant::getNullValue(Op->getType());
|
||||
Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
|
||||
Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr,
|
||||
Op->getType()->getVectorNumElements());
|
||||
Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr);
|
||||
} else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
|
||||
Name == "ssse3.pabs.w.128" ||
|
||||
Name == "ssse3.pabs.d.128" ||
|
||||
|
@ -3115,59 +3122,6 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
|||
break;
|
||||
}
|
||||
|
||||
case Intrinsic::x86_avx512_mask_cmp_pd_128:
|
||||
case Intrinsic::x86_avx512_mask_cmp_pd_256:
|
||||
case Intrinsic::x86_avx512_mask_cmp_pd_512:
|
||||
case Intrinsic::x86_avx512_mask_cmp_ps_128:
|
||||
case Intrinsic::x86_avx512_mask_cmp_ps_256:
|
||||
case Intrinsic::x86_avx512_mask_cmp_ps_512: {
|
||||
SmallVector<Value *, 4> Args;
|
||||
Args.push_back(CI->getArgOperand(0));
|
||||
Args.push_back(CI->getArgOperand(1));
|
||||
Args.push_back(CI->getArgOperand(2));
|
||||
if (CI->getNumArgOperands() == 5)
|
||||
Args.push_back(CI->getArgOperand(4));
|
||||
|
||||
NewCall = Builder.CreateCall(NewFn, Args);
|
||||
unsigned NumElts = Args[0]->getType()->getVectorNumElements();
|
||||
Value *Res = ApplyX86MaskOn1BitsVec(Builder, NewCall, CI->getArgOperand(3),
|
||||
NumElts);
|
||||
|
||||
std::string Name = CI->getName();
|
||||
if (!Name.empty()) {
|
||||
CI->setName(Name + ".old");
|
||||
NewCall->setName(Name);
|
||||
}
|
||||
CI->replaceAllUsesWith(Res);
|
||||
CI->eraseFromParent();
|
||||
return;
|
||||
}
|
||||
|
||||
case Intrinsic::x86_avx512_mask_fpclass_pd_128:
|
||||
case Intrinsic::x86_avx512_mask_fpclass_pd_256:
|
||||
case Intrinsic::x86_avx512_mask_fpclass_pd_512:
|
||||
case Intrinsic::x86_avx512_mask_fpclass_ps_128:
|
||||
case Intrinsic::x86_avx512_mask_fpclass_ps_256:
|
||||
case Intrinsic::x86_avx512_mask_fpclass_ps_512: {
|
||||
SmallVector<Value *, 4> Args;
|
||||
Args.push_back(CI->getArgOperand(0));
|
||||
Args.push_back(CI->getArgOperand(1));
|
||||
|
||||
NewCall = Builder.CreateCall(NewFn, Args);
|
||||
unsigned NumElts = Args[0]->getType()->getVectorNumElements();
|
||||
Value *Res = ApplyX86MaskOn1BitsVec(Builder, NewCall, CI->getArgOperand(2),
|
||||
NumElts);
|
||||
|
||||
std::string Name = CI->getName();
|
||||
if (!Name.empty()) {
|
||||
CI->setName(Name + ".old");
|
||||
NewCall->setName(Name);
|
||||
}
|
||||
CI->replaceAllUsesWith(Res);
|
||||
CI->eraseFromParent();
|
||||
return;
|
||||
}
|
||||
|
||||
case Intrinsic::thread_pointer: {
|
||||
NewCall = Builder.CreateCall(NewFn, {});
|
||||
break;
|
||||
|
|
|
@ -376,6 +376,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
|||
X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_add_pd_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
|
||||
X86_INTRINSIC_DATA(avx512_add_ps_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
|
||||
X86_INTRINSIC_DATA(avx512_cmp_pd_128, CMP_MASK_CC, X86ISD::CMPM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_cmp_pd_256, CMP_MASK_CC, X86ISD::CMPM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_cmp_pd_512, CMP_MASK_CC, X86ISD::CMPM, X86ISD::CMPM_RND),
|
||||
X86_INTRINSIC_DATA(avx512_cmp_ps_128, CMP_MASK_CC, X86ISD::CMPM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_cmp_ps_256, CMP_MASK_CC, X86ISD::CMPM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_cmp_ps_512, CMP_MASK_CC, X86ISD::CMPM, X86ISD::CMPM_RND),
|
||||
X86_INTRINSIC_DATA(avx512_cvtsi2sd64, INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_cvtsi2ss32, INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_cvtsi2ss64, INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0),
|
||||
|
@ -397,18 +403,16 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
|||
X86_INTRINSIC_DATA(avx512_div_ps_512, INTR_TYPE_2OP, ISD::FDIV, X86ISD::FDIV_RND),
|
||||
X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
|
||||
X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
|
||||
X86_INTRINSIC_DATA(avx512_fpclass_pd_128, FPCLASS, X86ISD::VFPCLASS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_fpclass_pd_256, FPCLASS, X86ISD::VFPCLASS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_fpclass_pd_512, FPCLASS, X86ISD::VFPCLASS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_fpclass_ps_128, FPCLASS, X86ISD::VFPCLASS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_fpclass_ps_256, FPCLASS, X86ISD::VFPCLASS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_fpclass_ps_512, FPCLASS, X86ISD::VFPCLASS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_add_sd_round, INTR_TYPE_SCALAR_MASK_RM,
|
||||
X86ISD::FADDS_RND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_add_ss_round, INTR_TYPE_SCALAR_MASK_RM,
|
||||
X86ISD::FADDS_RND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_cmp_pd_128, CMP_MASK_CC, X86ISD::CMPM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_cmp_pd_256, CMP_MASK_CC, X86ISD::CMPM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_cmp_pd_512, CMP_MASK_CC, X86ISD::CMPM,
|
||||
X86ISD::CMPM_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_cmp_ps_128, CMP_MASK_CC, X86ISD::CMPM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_cmp_ps_256, CMP_MASK_CC, X86ISD::CMPM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_cmp_ps_512, CMP_MASK_CC, X86ISD::CMPM,
|
||||
X86ISD::CMPM_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_cmp_sd, CMP_MASK_SCALAR_CC,
|
||||
X86ISD::FSETCCM, X86ISD::FSETCCM_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_cmp_ss, CMP_MASK_SCALAR_CC,
|
||||
|
@ -628,12 +632,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
|||
X86_INTRINSIC_DATA(avx512_mask_fixupimm_ps_512, FIXUPIMM, X86ISD::VFIXUPIMM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_fixupimm_sd, FIXUPIMMS, X86ISD::VFIXUPIMMS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_fixupimm_ss, FIXUPIMMS, X86ISD::VFIXUPIMMS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_fpclass_pd_128, FPCLASS, X86ISD::VFPCLASS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_fpclass_pd_256, FPCLASS, X86ISD::VFPCLASS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_fpclass_pd_512, FPCLASS, X86ISD::VFPCLASS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_fpclass_ps_128, FPCLASS, X86ISD::VFPCLASS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_fpclass_ps_256, FPCLASS, X86ISD::VFPCLASS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_fpclass_ps_512, FPCLASS, X86ISD::VFPCLASS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_fpclass_sd, FPCLASSS, X86ISD::VFPCLASSS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_fpclass_ss, FPCLASSS, X86ISD::VFPCLASSS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_getexp_pd_128, INTR_TYPE_1OP_MASK_RM,
|
||||
|
|
|
@ -2394,12 +2394,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
|||
return II;
|
||||
break;
|
||||
}
|
||||
case Intrinsic::x86_avx512_mask_cmp_pd_128:
|
||||
case Intrinsic::x86_avx512_mask_cmp_pd_256:
|
||||
case Intrinsic::x86_avx512_mask_cmp_pd_512:
|
||||
case Intrinsic::x86_avx512_mask_cmp_ps_128:
|
||||
case Intrinsic::x86_avx512_mask_cmp_ps_256:
|
||||
case Intrinsic::x86_avx512_mask_cmp_ps_512: {
|
||||
case Intrinsic::x86_avx512_cmp_pd_128:
|
||||
case Intrinsic::x86_avx512_cmp_pd_256:
|
||||
case Intrinsic::x86_avx512_cmp_pd_512:
|
||||
case Intrinsic::x86_avx512_cmp_ps_128:
|
||||
case Intrinsic::x86_avx512_cmp_ps_256:
|
||||
case Intrinsic::x86_avx512_cmp_ps_512: {
|
||||
// Folding cmp(sub(a,b),0) -> cmp(a,b) and cmp(0,sub(a,b)) -> cmp(b,a)
|
||||
Value *Arg0 = II->getArgOperand(0);
|
||||
Value *Arg1 = II->getArgOperand(1);
|
||||
|
|
|
@ -22,13 +22,13 @@ define zeroext i16 @cmp_kor_seq_16(<16 x float> %a, <16 x float> %b, <16 x float
|
|||
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%0 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %x, i32 13, i32 4)
|
||||
%0 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %a, <16 x float> %x, i32 13, i32 4)
|
||||
%1 = bitcast <16 x i1> %0 to i16
|
||||
%2 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %b, <16 x float> %x, i32 13, i32 4)
|
||||
%2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %b, <16 x float> %x, i32 13, i32 4)
|
||||
%3 = bitcast <16 x i1> %2 to i16
|
||||
%4 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %c, <16 x float> %x, i32 13, i32 4)
|
||||
%4 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %c, <16 x float> %x, i32 13, i32 4)
|
||||
%5 = bitcast <16 x i1> %4 to i16
|
||||
%6 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %d, <16 x float> %x, i32 13, i32 4)
|
||||
%6 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %d, <16 x float> %x, i32 13, i32 4)
|
||||
%7 = bitcast <16 x i1> %6 to i16
|
||||
%8 = bitcast i16 %1 to <16 x i1>
|
||||
%9 = bitcast i16 %3 to <16 x i1>
|
||||
|
@ -46,7 +46,7 @@ entry:
|
|||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, i32) #1
|
||||
declare <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float>, <16 x float>, i32, i32) #1
|
||||
|
||||
attributes #0 = { nounwind readnone uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="knl" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
|
|
@ -902,11 +902,11 @@ define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) {
|
|||
; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i32 8)
|
||||
%res = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i32 8)
|
||||
%1 = bitcast <16 x i1> %res to i16
|
||||
ret i16 %1
|
||||
}
|
||||
declare <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, i32)
|
||||
declare <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float>, <16 x float>, i32, i32)
|
||||
|
||||
define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) {
|
||||
; CHECK-LABEL: test_cmppd:
|
||||
|
@ -916,11 +916,11 @@ define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) {
|
|||
; CHECK-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i32 4)
|
||||
%res = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i32 4)
|
||||
%1 = bitcast <8 x i1> %res to i8
|
||||
ret i8 %1
|
||||
}
|
||||
declare <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double>, <8 x double>, i32, i32)
|
||||
declare <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double>, <8 x double>, i32, i32)
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
|
||||
|
@ -5121,9 +5121,9 @@ define <16 x float> @bad_mask_transition(<8 x double> %a, <8 x double> %b, <8 x
|
|||
; CHECK-NEXT: vblendmps %zmm5, %zmm4, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%0 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 17, i32 4)
|
||||
%0 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 17, i32 4)
|
||||
%1 = bitcast <8 x i1> %0 to i8
|
||||
%2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %c, <8 x double> %d, i32 17, i32 4)
|
||||
%2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %c, <8 x double> %d, i32 17, i32 4)
|
||||
%3 = bitcast <8 x i1> %2 to i8
|
||||
%conv = zext i8 %1 to i16
|
||||
%conv2 = zext i8 %3 to i16
|
||||
|
@ -5146,7 +5146,7 @@ define <16 x float> @bad_mask_transition_2(<8 x double> %a, <8 x double> %b, <8
|
|||
; CHECK-NEXT: vblendmps %zmm5, %zmm4, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%0 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 17, i32 4)
|
||||
%0 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 17, i32 4)
|
||||
%1 = bitcast <8 x i1> %0 to i8
|
||||
%conv = zext i8 %1 to i16
|
||||
%2 = bitcast i16 %conv to <16 x i1>
|
||||
|
|
|
@ -23,14 +23,14 @@ define zeroext i8 @test_mm512_mask_fpclass_pd_mask(i8 zeroext %__U, <8 x double>
|
|||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = tail call <8 x i1> @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %__A, i32 4)
|
||||
%0 = tail call <8 x i1> @llvm.x86.avx512.fpclass.pd.512(<8 x double> %__A, i32 4)
|
||||
%1 = bitcast i8 %__U to <8 x i1>
|
||||
%2 = and <8 x i1> %0, %1
|
||||
%3 = bitcast <8 x i1> %2 to i8
|
||||
ret i8 %3
|
||||
}
|
||||
|
||||
declare <8 x i1> @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double>, i32)
|
||||
declare <8 x i1> @llvm.x86.avx512.fpclass.pd.512(<8 x double>, i32)
|
||||
|
||||
define zeroext i8 @test_mm512_fpclass_pd_mask(<8 x double> %__A) {
|
||||
; CHECK-LABEL: test_mm512_fpclass_pd_mask:
|
||||
|
@ -41,7 +41,7 @@ define zeroext i8 @test_mm512_fpclass_pd_mask(<8 x double> %__A) {
|
|||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
entry:
|
||||
%0 = tail call <8 x i1> @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %__A, i32 4)
|
||||
%0 = tail call <8 x i1> @llvm.x86.avx512.fpclass.pd.512(<8 x double> %__A, i32 4)
|
||||
%1 = bitcast <8 x i1> %0 to i8
|
||||
ret i8 %1
|
||||
}
|
||||
|
@ -65,14 +65,14 @@ define zeroext i16 @test_mm512_mask_fpclass_ps_mask(i16 zeroext %__U, <16 x floa
|
|||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = tail call <16 x i1> @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %__A, i32 4)
|
||||
%0 = tail call <16 x i1> @llvm.x86.avx512.fpclass.ps.512(<16 x float> %__A, i32 4)
|
||||
%1 = bitcast i16 %__U to <16 x i1>
|
||||
%2 = and <16 x i1> %0, %1
|
||||
%3 = bitcast <16 x i1> %2 to i16
|
||||
ret i16 %3
|
||||
}
|
||||
|
||||
declare <16 x i1> @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float>, i32)
|
||||
declare <16 x i1> @llvm.x86.avx512.fpclass.ps.512(<16 x float>, i32)
|
||||
|
||||
define zeroext i16 @test_mm512_fpclass_ps_mask(<16 x float> %__A) {
|
||||
; CHECK-LABEL: test_mm512_fpclass_ps_mask:
|
||||
|
@ -83,7 +83,7 @@ define zeroext i16 @test_mm512_fpclass_ps_mask(<16 x float> %__A) {
|
|||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
entry:
|
||||
%0 = tail call <16 x i1> @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %__A, i32 4)
|
||||
%0 = tail call <16 x i1> @llvm.x86.avx512.fpclass.ps.512(<16 x float> %__A, i32 4)
|
||||
%1 = bitcast <16 x i1> %0 to i16
|
||||
ret i16 %1
|
||||
}
|
||||
|
|
|
@ -600,10 +600,10 @@ define <2 x double>@test_int_x86_avx512_mask_range_sd(<2 x double> %x0, <2 x dou
|
|||
ret <2 x double> %res4
|
||||
}
|
||||
|
||||
declare <8 x i1> @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double>, i32)
|
||||
declare <8 x i1> @llvm.x86.avx512.fpclass.pd.512(<8 x double>, i32)
|
||||
|
||||
define i8 @test_int_x86_avx512_mask_fpclass_pd_512(<8 x double> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_512:
|
||||
define i8 @test_int_x86_avx512_fpclass_pd_512(<8 x double> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_fpclass_pd_512:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfpclasspd $2, %zmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x48,0x66,0xc8,0x02]
|
||||
; CHECK-NEXT: vfpclasspd $4, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x66,0xc0,0x04]
|
||||
|
@ -611,16 +611,16 @@ define i8 @test_int_x86_avx512_mask_fpclass_pd_512(<8 x double> %x0) {
|
|||
; CHECK-NEXT: # kill: def $al killed $al killed $eax
|
||||
; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call <8 x i1> @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 4)
|
||||
%res1 = call <8 x i1> @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 2)
|
||||
%res = call <8 x i1> @llvm.x86.avx512.fpclass.pd.512(<8 x double> %x0, i32 4)
|
||||
%res1 = call <8 x i1> @llvm.x86.avx512.fpclass.pd.512(<8 x double> %x0, i32 2)
|
||||
%1 = and <8 x i1> %res1, %res
|
||||
%2 = bitcast <8 x i1> %1 to i8
|
||||
ret i8 %2
|
||||
}
|
||||
declare <16 x i1> @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float>, i32)
|
||||
declare <16 x i1> @llvm.x86.avx512.fpclass.ps.512(<16 x float>, i32)
|
||||
|
||||
define i16@test_int_x86_avx512_mask_fpclass_ps_512(<16 x float> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_512:
|
||||
define i16@test_int_x86_avx512_fpclass_ps_512(<16 x float> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_fpclass_ps_512:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfpclassps $2, %zmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x48,0x66,0xc8,0x02]
|
||||
; CHECK-NEXT: vfpclassps $4, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x66,0xc0,0x04]
|
||||
|
@ -628,8 +628,8 @@ define i16@test_int_x86_avx512_mask_fpclass_ps_512(<16 x float> %x0) {
|
|||
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call <16 x i1> @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4)
|
||||
%res1 = call <16 x i1> @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 2)
|
||||
%res = call <16 x i1> @llvm.x86.avx512.fpclass.ps.512(<16 x float> %x0, i32 4)
|
||||
%res1 = call <16 x i1> @llvm.x86.avx512.fpclass.ps.512(<16 x float> %x0, i32 2)
|
||||
%1 = and <16 x i1> %res1, %res
|
||||
%2 = bitcast <16 x i1> %1 to i16
|
||||
ret i16 %2
|
||||
|
|
|
@ -221,7 +221,7 @@ define zeroext i8 @test_mm_mask_fpclass_pd_mask(i8 zeroext %__U, <2 x double> %_
|
|||
; X64-NEXT: # kill: def $al killed $al killed $eax
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = tail call <2 x i1> @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %__A, i32 2)
|
||||
%0 = tail call <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double> %__A, i32 2)
|
||||
%1 = bitcast i8 %__U to <8 x i1>
|
||||
%extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
||||
%2 = and <2 x i1> %0, %extract
|
||||
|
@ -230,7 +230,7 @@ entry:
|
|||
ret i8 %4
|
||||
}
|
||||
|
||||
declare <2 x i1> @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double>, i32)
|
||||
declare <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double>, i32)
|
||||
|
||||
define zeroext i8 @test_mm_fpclass_pd_mask(<2 x double> %__A) {
|
||||
; CHECK-LABEL: test_mm_fpclass_pd_mask:
|
||||
|
@ -240,7 +240,7 @@ define zeroext i8 @test_mm_fpclass_pd_mask(<2 x double> %__A) {
|
|||
; CHECK-NEXT: # kill: def $al killed $al killed $eax
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
entry:
|
||||
%0 = tail call <2 x i1> @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %__A, i32 2)
|
||||
%0 = tail call <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double> %__A, i32 2)
|
||||
%1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
||||
%2 = bitcast <8 x i1> %1 to i8
|
||||
ret i8 %2
|
||||
|
@ -265,7 +265,7 @@ define zeroext i8 @test_mm256_mask_fpclass_pd_mask(i8 zeroext %__U, <4 x double>
|
|||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = tail call <4 x i1> @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %__A, i32 2)
|
||||
%0 = tail call <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double> %__A, i32 2)
|
||||
%1 = bitcast i8 %__U to <8 x i1>
|
||||
%extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%2 = and <4 x i1> %0, %extract
|
||||
|
@ -274,7 +274,7 @@ entry:
|
|||
ret i8 %4
|
||||
}
|
||||
|
||||
declare <4 x i1> @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double>, i32)
|
||||
declare <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double>, i32)
|
||||
|
||||
define zeroext i8 @test_mm256_fpclass_pd_mask(<4 x double> %__A) {
|
||||
; CHECK-LABEL: test_mm256_fpclass_pd_mask:
|
||||
|
@ -285,7 +285,7 @@ define zeroext i8 @test_mm256_fpclass_pd_mask(<4 x double> %__A) {
|
|||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
entry:
|
||||
%0 = tail call <4 x i1> @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %__A, i32 2)
|
||||
%0 = tail call <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double> %__A, i32 2)
|
||||
%1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%2 = bitcast <8 x i1> %1 to i8
|
||||
ret i8 %2
|
||||
|
@ -308,7 +308,7 @@ define zeroext i8 @test_mm_mask_fpclass_ps_mask(i8 zeroext %__U, <4 x float> %__
|
|||
; X64-NEXT: # kill: def $al killed $al killed $eax
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = tail call <4 x i1> @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %__A, i32 2)
|
||||
%0 = tail call <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float> %__A, i32 2)
|
||||
%1 = bitcast i8 %__U to <8 x i1>
|
||||
%extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%2 = and <4 x i1> %0, %extract
|
||||
|
@ -317,7 +317,7 @@ entry:
|
|||
ret i8 %4
|
||||
}
|
||||
|
||||
declare <4 x i1> @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float>, i32)
|
||||
declare <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float>, i32)
|
||||
|
||||
define zeroext i8 @test_mm_fpclass_ps_mask(<4 x float> %__A) {
|
||||
; CHECK-LABEL: test_mm_fpclass_ps_mask:
|
||||
|
@ -327,7 +327,7 @@ define zeroext i8 @test_mm_fpclass_ps_mask(<4 x float> %__A) {
|
|||
; CHECK-NEXT: # kill: def $al killed $al killed $eax
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
entry:
|
||||
%0 = tail call <4 x i1> @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %__A, i32 2)
|
||||
%0 = tail call <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float> %__A, i32 2)
|
||||
%1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%2 = bitcast <8 x i1> %1 to i8
|
||||
ret i8 %2
|
||||
|
@ -352,14 +352,14 @@ define zeroext i8 @test_mm256_mask_fpclass_ps_mask(i8 zeroext %__U, <8 x float>
|
|||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = tail call <8 x i1> @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %__A, i32 2)
|
||||
%0 = tail call <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float> %__A, i32 2)
|
||||
%1 = bitcast i8 %__U to <8 x i1>
|
||||
%2 = and <8 x i1> %0, %1
|
||||
%3 = bitcast <8 x i1> %2 to i8
|
||||
ret i8 %3
|
||||
}
|
||||
|
||||
declare <8 x i1> @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float>, i32)
|
||||
declare <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float>, i32)
|
||||
|
||||
define zeroext i8 @test_mm256_fpclass_ps_mask(<8 x float> %__A) {
|
||||
; CHECK-LABEL: test_mm256_fpclass_ps_mask:
|
||||
|
@ -370,7 +370,7 @@ define zeroext i8 @test_mm256_fpclass_ps_mask(<8 x float> %__A) {
|
|||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
entry:
|
||||
%0 = tail call <8 x i1> @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %__A, i32 2)
|
||||
%0 = tail call <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float> %__A, i32 2)
|
||||
%1 = bitcast <8 x i1> %0 to i8
|
||||
ret i8 %1
|
||||
}
|
||||
|
|
|
@ -734,28 +734,28 @@ define <8 x float>@test_int_x86_avx512_mask_range_ps_256(<8 x float> %x0, <8 x f
|
|||
ret <8 x float> %res2
|
||||
}
|
||||
|
||||
declare <4 x i1> @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float>, i32)
|
||||
declare <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float>, i32)
|
||||
|
||||
define i8 @test_int_x86_avx512_mask_fpclass_ps_128(<4 x float> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_128:
|
||||
define i8 @test_int_x86_avx512_fpclass_ps_128(<4 x float> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_fpclass_ps_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfpclassps $4, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x66,0xc8,0x04]
|
||||
; CHECK-NEXT: vfpclassps $2, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x66,0xc0,0x02]
|
||||
; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: # kill: def $al killed $al killed $eax
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call <4 x i1> @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 2)
|
||||
%res1 = call <4 x i1> @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 4)
|
||||
%res = call <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float> %x0, i32 2)
|
||||
%res1 = call <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float> %x0, i32 4)
|
||||
%1 = and <4 x i1> %res1, %res
|
||||
%2 = shufflevector <4 x i1> %1, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%3 = bitcast <8 x i1> %2 to i8
|
||||
ret i8 %3
|
||||
}
|
||||
|
||||
declare <8 x i1> @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float>, i32)
|
||||
declare <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float>, i32)
|
||||
|
||||
define i8 @test_int_x86_avx512_mask_fpclass_ps_256(<8 x float> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_256:
|
||||
define i8 @test_int_x86_avx512_fpclass_ps_256(<8 x float> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_fpclass_ps_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfpclassps $4, %ymm0, %k1 # encoding: [0x62,0xf3,0x7d,0x28,0x66,0xc8,0x04]
|
||||
; CHECK-NEXT: vfpclassps $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x66,0xc0,0x02]
|
||||
|
@ -763,35 +763,35 @@ define i8 @test_int_x86_avx512_mask_fpclass_ps_256(<8 x float> %x0) {
|
|||
; CHECK-NEXT: # kill: def $al killed $al killed $eax
|
||||
; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call <8 x i1> @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 2)
|
||||
%res1 = call <8 x i1> @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 4)
|
||||
%res = call <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float> %x0, i32 2)
|
||||
%res1 = call <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float> %x0, i32 4)
|
||||
%1 = and <8 x i1> %res1, %res
|
||||
%2 = bitcast <8 x i1> %1 to i8
|
||||
ret i8 %2
|
||||
}
|
||||
|
||||
declare <2 x i1> @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double>, i32)
|
||||
declare <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double>, i32)
|
||||
|
||||
define i8 @test_int_x86_avx512_mask_fpclass_pd_128(<2 x double> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_128:
|
||||
define i8 @test_int_x86_avx512_fpclass_pd_128(<2 x double> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_fpclass_pd_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfpclasspd $2, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x66,0xc8,0x02]
|
||||
; CHECK-NEXT: vfpclasspd $4, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x66,0xc0,0x04]
|
||||
; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: # kill: def $al killed $al killed $eax
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call <2 x i1> @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 4)
|
||||
%res1 = call <2 x i1> @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 2)
|
||||
%res = call <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double> %x0, i32 4)
|
||||
%res1 = call <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double> %x0, i32 2)
|
||||
%1 = and <2 x i1> %res1, %res
|
||||
%2 = shufflevector <2 x i1> %1, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
||||
%3 = bitcast <8 x i1> %2 to i8
|
||||
ret i8 %3
|
||||
}
|
||||
|
||||
declare <4 x i1> @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double>, i32)
|
||||
declare <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double>, i32)
|
||||
|
||||
define i8 @test_int_x86_avx512_mask_fpclass_pd_256(<4 x double> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_256:
|
||||
define i8 @test_int_x86_avx512_fpclass_pd_256(<4 x double> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_fpclass_pd_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfpclasspd $4, %ymm0, %k1 # encoding: [0x62,0xf3,0xfd,0x28,0x66,0xc8,0x04]
|
||||
; CHECK-NEXT: vfpclasspd $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x66,0xc0,0x02]
|
||||
|
@ -799,8 +799,8 @@ define i8 @test_int_x86_avx512_mask_fpclass_pd_256(<4 x double> %x0) {
|
|||
; CHECK-NEXT: # kill: def $al killed $al killed $eax
|
||||
; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call <4 x i1> @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 2)
|
||||
%res1 = call <4 x i1> @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 4)
|
||||
%res = call <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double> %x0, i32 2)
|
||||
%res1 = call <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double> %x0, i32 4)
|
||||
%1 = and <4 x i1> %res1, %res
|
||||
%2 = shufflevector <4 x i1> %1, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%3 = bitcast <8 x i1> %2 to i8
|
||||
|
|
|
@ -746,11 +746,11 @@ define i8 @test_cmpps_256(<8 x float> %a, <8 x float> %b) {
|
|||
; CHECK-NEXT: # kill: def $al killed $al killed $eax
|
||||
; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %a, <8 x float> %b, i32 2)
|
||||
%res = call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> %a, <8 x float> %b, i32 2)
|
||||
%1 = bitcast <8 x i1> %res to i8
|
||||
ret i8 %1
|
||||
}
|
||||
declare <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float>, <8 x float>, i32)
|
||||
declare <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float>, <8 x float>, i32)
|
||||
|
||||
define i8 @test_cmpps_128(<4 x float> %a, <4 x float> %b) {
|
||||
; CHECK-LABEL: test_cmpps_128:
|
||||
|
@ -759,12 +759,12 @@ define i8 @test_cmpps_128(<4 x float> %a, <4 x float> %b) {
|
|||
; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: # kill: def $al killed $al killed $eax
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %a, <4 x float> %b, i32 2)
|
||||
%res = call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> %a, <4 x float> %b, i32 2)
|
||||
%1 = shufflevector <4 x i1> %res, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%2 = bitcast <8 x i1> %1 to i8
|
||||
ret i8 %2
|
||||
}
|
||||
declare <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float>, <4 x float>, i32)
|
||||
declare <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float>, <4 x float>, i32)
|
||||
|
||||
define i8 @test_cmppd_256(<4 x double> %a, <4 x double> %b) {
|
||||
; CHECK-LABEL: test_cmppd_256:
|
||||
|
@ -774,12 +774,12 @@ define i8 @test_cmppd_256(<4 x double> %a, <4 x double> %b) {
|
|||
; CHECK-NEXT: # kill: def $al killed $al killed $eax
|
||||
; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %a, <4 x double> %b, i32 2)
|
||||
%res = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> %a, <4 x double> %b, i32 2)
|
||||
%1 = shufflevector <4 x i1> %res, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%2 = bitcast <8 x i1> %1 to i8
|
||||
ret i8 %2
|
||||
}
|
||||
declare <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double>, <4 x double>, i32)
|
||||
declare <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double>, <4 x double>, i32)
|
||||
|
||||
define i8 @test_cmppd_128(<2 x double> %a, <2 x double> %b) {
|
||||
; CHECK-LABEL: test_cmppd_128:
|
||||
|
@ -788,12 +788,12 @@ define i8 @test_cmppd_128(<2 x double> %a, <2 x double> %b) {
|
|||
; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: # kill: def $al killed $al killed $eax
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %a, <2 x double> %b, i32 2)
|
||||
%res = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> %a, <2 x double> %b, i32 2)
|
||||
%1 = shufflevector <2 x i1> %res, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
||||
%2 = bitcast <8 x i1> %1 to i8
|
||||
ret i8 %2
|
||||
}
|
||||
declare <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double>, <2 x double>, i32)
|
||||
declare <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double>, <2 x double>, i32)
|
||||
|
||||
define <8 x float> @test_mm512_maskz_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
|
||||
; X86-LABEL: test_mm512_maskz_max_ps_256:
|
||||
|
|
|
@ -19762,7 +19762,7 @@ entry:
|
|||
}
|
||||
|
||||
|
||||
declare <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, i32)
|
||||
declare <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float>, <16 x float>, i32, i32)
|
||||
define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
||||
; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask:
|
||||
; VLX: # %bb.0: # %entry
|
||||
|
@ -21229,7 +21229,7 @@ define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_sae_mask(<8 x i64> %__a, <8 x i64
|
|||
entry:
|
||||
%0 = bitcast <8 x i64> %__a to <16 x float>
|
||||
%1 = bitcast <8 x i64> %__b to <16 x float>
|
||||
%2 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8)
|
||||
%2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8)
|
||||
%3 = bitcast <16 x i1> %2 to i16
|
||||
%4 = zext i16 %3 to i32
|
||||
ret i32 %4
|
||||
|
@ -21254,7 +21254,7 @@ define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_sae_mask(i16 zeroext %__u,
|
|||
entry:
|
||||
%0 = bitcast <8 x i64> %__a to <16 x float>
|
||||
%1 = bitcast <8 x i64> %__b to <16 x float>
|
||||
%2 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8)
|
||||
%2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8)
|
||||
%3 = bitcast i16 %__u to <16 x i1>
|
||||
%4 = and <16 x i1> %2, %3
|
||||
%5 = bitcast <16 x i1> %4 to i16
|
||||
|
@ -21444,7 +21444,7 @@ define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_sae_mask(<8 x i64> %__a, <8 x i64
|
|||
entry:
|
||||
%0 = bitcast <8 x i64> %__a to <16 x float>
|
||||
%1 = bitcast <8 x i64> %__b to <16 x float>
|
||||
%2 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8)
|
||||
%2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8)
|
||||
%3 = bitcast <16 x i1> %2 to i16
|
||||
%4 = zext i16 %3 to i64
|
||||
ret i64 %4
|
||||
|
@ -21469,7 +21469,7 @@ define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_sae_mask(i16 zeroext %__u,
|
|||
entry:
|
||||
%0 = bitcast <8 x i64> %__a to <16 x float>
|
||||
%1 = bitcast <8 x i64> %__b to <16 x float>
|
||||
%2 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8)
|
||||
%2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8)
|
||||
%3 = bitcast i16 %__u to <16 x i1>
|
||||
%4 = and <16 x i1> %2, %3
|
||||
%5 = bitcast <16 x i1> %4 to i16
|
||||
|
@ -21479,7 +21479,7 @@ entry:
|
|||
|
||||
|
||||
|
||||
declare <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double>, <8 x double>, i32, i32)
|
||||
declare <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double>, <8 x double>, i32, i32)
|
||||
define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
||||
; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask:
|
||||
; VLX: # %bb.0: # %entry
|
||||
|
@ -23345,7 +23345,7 @@ define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_sae_mask(<8 x i64> %__a, <8 x i64>
|
|||
entry:
|
||||
%0 = bitcast <8 x i64> %__a to <8 x double>
|
||||
%1 = bitcast <8 x i64> %__b to <8 x double>
|
||||
%2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
|
||||
%2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
|
||||
%3 = bitcast <8 x i1> %2 to i8
|
||||
%4 = zext i8 %3 to i16
|
||||
ret i16 %4
|
||||
|
@ -23374,7 +23374,7 @@ define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_sae_mask(i8 zeroext %__u, <
|
|||
entry:
|
||||
%0 = bitcast <8 x i64> %__a to <8 x double>
|
||||
%1 = bitcast <8 x i64> %__b to <8 x double>
|
||||
%2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
|
||||
%2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
|
||||
%3 = bitcast i8 %__u to <8 x i1>
|
||||
%4 = and <8 x i1> %2, %3
|
||||
%5 = bitcast <8 x i1> %4 to i8
|
||||
|
@ -23560,7 +23560,7 @@ define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_sae_mask(<8 x i64> %__a, <8 x i64>
|
|||
entry:
|
||||
%0 = bitcast <8 x i64> %__a to <8 x double>
|
||||
%1 = bitcast <8 x i64> %__b to <8 x double>
|
||||
%2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
|
||||
%2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
|
||||
%3 = bitcast <8 x i1> %2 to i8
|
||||
%4 = zext i8 %3 to i32
|
||||
ret i32 %4
|
||||
|
@ -23587,7 +23587,7 @@ define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_sae_mask(i8 zeroext %__u, <
|
|||
entry:
|
||||
%0 = bitcast <8 x i64> %__a to <8 x double>
|
||||
%1 = bitcast <8 x i64> %__b to <8 x double>
|
||||
%2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
|
||||
%2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
|
||||
%3 = bitcast i8 %__u to <8 x i1>
|
||||
%4 = and <8 x i1> %2, %3
|
||||
%5 = bitcast <8 x i1> %4 to i8
|
||||
|
@ -23780,7 +23780,7 @@ define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_sae_mask(<8 x i64> %__a, <8 x i64>
|
|||
entry:
|
||||
%0 = bitcast <8 x i64> %__a to <8 x double>
|
||||
%1 = bitcast <8 x i64> %__b to <8 x double>
|
||||
%2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
|
||||
%2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
|
||||
%3 = bitcast <8 x i1> %2 to i8
|
||||
%4 = zext i8 %3 to i64
|
||||
ret i64 %4
|
||||
|
@ -23807,7 +23807,7 @@ define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_sae_mask(i8 zeroext %__u, <
|
|||
entry:
|
||||
%0 = bitcast <8 x i64> %__a to <8 x double>
|
||||
%1 = bitcast <8 x i64> %__b to <8 x double>
|
||||
%2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
|
||||
%2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
|
||||
%3 = bitcast i8 %__u to <8 x i1>
|
||||
%4 = and <8 x i1> %2, %3
|
||||
%5 = bitcast <8 x i1> %4 to i8
|
||||
|
@ -23830,7 +23830,7 @@ define i32 @test_cmpm_rnd_zero(<16 x float> %a, <16 x float> %b) {
|
|||
; NoVLX-NEXT: kmovw %k0, %eax
|
||||
; NoVLX-NEXT: vzeroupper
|
||||
; NoVLX-NEXT: retq
|
||||
%res = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i32 8)
|
||||
%res = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i32 8)
|
||||
%1 = bitcast <16 x i1> %res to i16
|
||||
%cast = bitcast i16 %1 to <16 x i1>
|
||||
%shuffle = shufflevector <16 x i1> %cast, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
||||
|
|
|
@ -140,21 +140,21 @@ define i8 @stack_fold_cmppd(<8 x double> %a0, <8 x double> %a1) {
|
|||
;CHECK-LABEL: stack_fold_cmppd
|
||||
;CHECK: vcmpeqpd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%k[0-9]}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%res = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a0, <8 x double> %a1, i32 0, i32 4)
|
||||
%res = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %a0, <8 x double> %a1, i32 0, i32 4)
|
||||
%2 = bitcast <8 x i1> %res to i8
|
||||
ret i8 %2
|
||||
}
|
||||
declare <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double>, <8 x double>, i32, i32)
|
||||
declare <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double>, <8 x double>, i32, i32)
|
||||
|
||||
define i16 @stack_fold_cmpps(<16 x float> %a0, <16 x float> %a1) {
|
||||
;CHECK-LABEL: stack_fold_cmpps
|
||||
;CHECK: vcmpeqps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%k[0-9]*}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%res = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0, i32 4)
|
||||
%res = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0, i32 4)
|
||||
%2 = bitcast <16 x i1> %res to i16
|
||||
ret i16 %2
|
||||
}
|
||||
declare <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, i32)
|
||||
declare <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float>, <16 x float>, i32, i32)
|
||||
|
||||
define <2 x double> @stack_fold_divsd_int(<2 x double> %a0, <2 x double> %a1) {
|
||||
;CHECK-LABEL: stack_fold_divsd_int
|
||||
|
|
|
@ -152,44 +152,44 @@ define i8 @stack_fold_cmppd(<2 x double> %a0, <2 x double> %a1) {
|
|||
;CHECK-LABEL: stack_fold_cmppd
|
||||
;CHECK: vcmpeqpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%k[0-9]}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%res = call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %a0, <2 x double> %a1, i32 0)
|
||||
%res = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> %a0, <2 x double> %a1, i32 0)
|
||||
%2 = shufflevector <2 x i1> %res, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
||||
%3 = bitcast <8 x i1> %2 to i8
|
||||
ret i8 %3
|
||||
}
|
||||
declare <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double>, <2 x double>, i32)
|
||||
declare <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double>, <2 x double>, i32)
|
||||
|
||||
define i8 @stack_fold_cmppd_ymm(<4 x double> %a0, <4 x double> %a1) {
|
||||
;CHECK-LABEL: stack_fold_cmppd_ymm
|
||||
;CHECK: vcmpeqpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%k[0-9]}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%res = call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i32 0)
|
||||
%res = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i32 0)
|
||||
%2 = shufflevector <4 x i1> %res, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%3 = bitcast <8 x i1> %2 to i8
|
||||
ret i8 %3
|
||||
}
|
||||
declare <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double>, <4 x double>, i32)
|
||||
declare <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double>, <4 x double>, i32)
|
||||
|
||||
define i8 @stack_fold_cmpps(<4 x float> %a0, <4 x float> %a1) {
|
||||
;CHECK-LABEL: stack_fold_cmpps
|
||||
;CHECK: vcmpeqps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%k[0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%res = call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %a0, <4 x float> %a1, i32 0)
|
||||
%res = call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> %a0, <4 x float> %a1, i32 0)
|
||||
%2 = shufflevector <4 x i1> %res, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%3 = bitcast <8 x i1> %2 to i8
|
||||
ret i8 %3
|
||||
}
|
||||
declare <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float>, <4 x float>, i32)
|
||||
declare <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float>, <4 x float>, i32)
|
||||
|
||||
define i8 @stack_fold_cmpps_ymm(<8 x float> %a0, <8 x float> %a1) {
|
||||
;CHECK-LABEL: stack_fold_cmpps_ymm
|
||||
;CHECK: vcmpeqps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%k[0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%res = call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 0)
|
||||
%res = call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 0)
|
||||
%2 = bitcast <8 x i1> %res to i8
|
||||
ret i8 %2
|
||||
}
|
||||
declare <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float>, <8 x float>, i32)
|
||||
declare <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float>, <8 x float>, i32)
|
||||
|
||||
define <2 x double> @stack_fold_divpd(<2 x double> %a0, <2 x double> %a1) {
|
||||
;CHECK-LABEL: stack_fold_divpd
|
||||
|
|
|
@ -6,13 +6,13 @@
|
|||
define i8 @sub_compare_foldingPD128_safe(<2 x double> %a, <2 x double> %b){
|
||||
; CHECK-LABEL: @sub_compare_foldingPD128_safe(
|
||||
; CHECK-NEXT: [[SUB_SAFE:%.*]] = fsub <2 x double> [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[T0:%.*]] = call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> [[SUB_SAFE]], <2 x double> zeroinitializer, i32 5)
|
||||
; CHECK-NEXT: [[T0:%.*]] = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> [[SUB_SAFE]], <2 x double> zeroinitializer, i32 5)
|
||||
; CHECK-NEXT: [[T1:%.*]] = shufflevector <2 x i1> [[T0]], <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
||||
; CHECK-NEXT: [[T2:%.*]] = bitcast <8 x i1> [[T1]] to i8
|
||||
; CHECK-NEXT: ret i8 [[T2]]
|
||||
;
|
||||
%sub.safe = fsub <2 x double> %a, %b
|
||||
%t0 = call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %sub.safe, <2 x double> zeroinitializer, i32 5)
|
||||
%t0 = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> %sub.safe, <2 x double> zeroinitializer, i32 5)
|
||||
%t1 = shufflevector <2 x i1> %t0, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
||||
%t2 = bitcast <8 x i1> %t1 to i8
|
||||
ret i8 %t2
|
||||
|
@ -20,13 +20,13 @@ define i8 @sub_compare_foldingPD128_safe(<2 x double> %a, <2 x double> %b){
|
|||
|
||||
define i8 @sub_compare_foldingPD128(<2 x double> %a, <2 x double> %b){
|
||||
; CHECK-LABEL: @sub_compare_foldingPD128(
|
||||
; CHECK-NEXT: [[T0:%.*]] = call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], i32 5)
|
||||
; CHECK-NEXT: [[T0:%.*]] = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], i32 5)
|
||||
; CHECK-NEXT: [[T1:%.*]] = shufflevector <2 x i1> [[T0]], <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
||||
; CHECK-NEXT: [[T2:%.*]] = bitcast <8 x i1> [[T1]] to i8
|
||||
; CHECK-NEXT: ret i8 [[T2]]
|
||||
;
|
||||
%sub.i = fsub ninf <2 x double> %a, %b
|
||||
%t0 = call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %sub.i, <2 x double> zeroinitializer, i32 5)
|
||||
%t0 = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> %sub.i, <2 x double> zeroinitializer, i32 5)
|
||||
%t1 = shufflevector <2 x i1> %t0, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
||||
%t2 = bitcast <8 x i1> %t1 to i8
|
||||
ret i8 %t2
|
||||
|
@ -34,13 +34,13 @@ define i8 @sub_compare_foldingPD128(<2 x double> %a, <2 x double> %b){
|
|||
|
||||
define i8 @sub_compare_foldingPD128_undef_elt(<2 x double> %a, <2 x double> %b){
|
||||
; CHECK-LABEL: @sub_compare_foldingPD128_undef_elt(
|
||||
; CHECK-NEXT: [[T0:%.*]] = call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], i32 5)
|
||||
; CHECK-NEXT: [[T0:%.*]] = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], i32 5)
|
||||
; CHECK-NEXT: [[T1:%.*]] = shufflevector <2 x i1> [[T0]], <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
||||
; CHECK-NEXT: [[T2:%.*]] = bitcast <8 x i1> [[T1]] to i8
|
||||
; CHECK-NEXT: ret i8 [[T2]]
|
||||
;
|
||||
%sub.i = fsub ninf <2 x double> %a, %b
|
||||
%t0 = call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %sub.i, <2 x double> <double 0.0, double undef>, i32 5)
|
||||
%t0 = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> %sub.i, <2 x double> <double 0.0, double undef>, i32 5)
|
||||
%t1 = shufflevector <2 x i1> %t0, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
||||
%t2 = bitcast <8 x i1> %t1 to i8
|
||||
ret i8 %t2
|
||||
|
@ -48,13 +48,13 @@ define i8 @sub_compare_foldingPD128_undef_elt(<2 x double> %a, <2 x double> %b){
|
|||
|
||||
define i8 @sub_compare_foldingPD256(<4 x double> %a, <4 x double> %b){
|
||||
; CHECK-LABEL: @sub_compare_foldingPD256(
|
||||
; CHECK-NEXT: [[T0:%.*]] = call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> [[A:%.*]], <4 x double> [[B:%.*]], i32 5)
|
||||
; CHECK-NEXT: [[T0:%.*]] = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> [[A:%.*]], <4 x double> [[B:%.*]], i32 5)
|
||||
; CHECK-NEXT: [[T1:%.*]] = shufflevector <4 x i1> [[T0]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
; CHECK-NEXT: [[T2:%.*]] = bitcast <8 x i1> [[T1]] to i8
|
||||
; CHECK-NEXT: ret i8 [[T2]]
|
||||
;
|
||||
%sub.i1 = fsub ninf <4 x double> %a, %b
|
||||
%t0 = call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %sub.i1, <4 x double> zeroinitializer, i32 5)
|
||||
%t0 = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> %sub.i1, <4 x double> zeroinitializer, i32 5)
|
||||
%t1 = shufflevector <4 x i1> %t0, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%t2 = bitcast <8 x i1> %t1 to i8
|
||||
ret i8 %t2
|
||||
|
@ -62,25 +62,25 @@ define i8 @sub_compare_foldingPD256(<4 x double> %a, <4 x double> %b){
|
|||
|
||||
define i8 @sub_compare_foldingPD512(<8 x double> %a, <8 x double> %b){
|
||||
; CHECK-LABEL: @sub_compare_foldingPD512(
|
||||
; CHECK-NEXT: [[T0:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 11, i32 4)
|
||||
; CHECK-NEXT: [[T0:%.*]] = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 11, i32 4)
|
||||
; CHECK-NEXT: [[T1:%.*]] = bitcast <8 x i1> [[T0]] to i8
|
||||
; CHECK-NEXT: ret i8 [[T1]]
|
||||
;
|
||||
%sub.i2 = fsub ninf <8 x double> %a, %b
|
||||
%t0 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %sub.i2, <8 x double> zeroinitializer, i32 11, i32 4)
|
||||
%t0 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %sub.i2, <8 x double> zeroinitializer, i32 11, i32 4)
|
||||
%t1 = bitcast <8 x i1> %t0 to i8
|
||||
ret i8 %t1
|
||||
}
|
||||
|
||||
define i8 @sub_compare_foldingPS128(<4 x float> %a, <4 x float> %b){
|
||||
; CHECK-LABEL: @sub_compare_foldingPS128(
|
||||
; CHECK-NEXT: [[T0:%.*]] = call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 12)
|
||||
; CHECK-NEXT: [[T0:%.*]] = call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 12)
|
||||
; CHECK-NEXT: [[T1:%.*]] = shufflevector <4 x i1> [[T0]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
; CHECK-NEXT: [[T2:%.*]] = bitcast <8 x i1> [[T1]] to i8
|
||||
; CHECK-NEXT: ret i8 [[T2]]
|
||||
;
|
||||
%sub.i3 = fsub ninf <4 x float> %a, %b
|
||||
%t0 = call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %sub.i3, <4 x float> zeroinitializer, i32 12)
|
||||
%t0 = call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> %sub.i3, <4 x float> zeroinitializer, i32 12)
|
||||
%t1 = shufflevector <4 x i1> %t0, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%t2 = bitcast <8 x i1> %t1 to i8
|
||||
ret i8 %t2
|
||||
|
@ -88,37 +88,37 @@ define i8 @sub_compare_foldingPS128(<4 x float> %a, <4 x float> %b){
|
|||
|
||||
define i8 @sub_compare_foldingPS256(<8 x float> %a, <8 x float> %b){
|
||||
; CHECK-LABEL: @sub_compare_foldingPS256(
|
||||
; CHECK-NEXT: [[T0:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> [[A:%.*]], <8 x float> [[B:%.*]], i32 5)
|
||||
; CHECK-NEXT: [[T0:%.*]] = call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> [[A:%.*]], <8 x float> [[B:%.*]], i32 5)
|
||||
; CHECK-NEXT: [[T1:%.*]] = bitcast <8 x i1> [[T0]] to i8
|
||||
; CHECK-NEXT: ret i8 [[T1]]
|
||||
;
|
||||
%sub.i4 = fsub ninf <8 x float> %a, %b
|
||||
%t0 = call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %sub.i4, <8 x float> zeroinitializer, i32 5)
|
||||
%t0 = call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> %sub.i4, <8 x float> zeroinitializer, i32 5)
|
||||
%t1 = bitcast <8 x i1> %t0 to i8
|
||||
ret i8 %t1
|
||||
}
|
||||
|
||||
define i16 @sub_compare_foldingPS512(<16 x float> %a, <16 x float> %b){
|
||||
; CHECK-LABEL: @sub_compare_foldingPS512(
|
||||
; CHECK-NEXT: [[T0:%.*]] = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 11, i32 4)
|
||||
; CHECK-NEXT: [[T0:%.*]] = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 11, i32 4)
|
||||
; CHECK-NEXT: [[T1:%.*]] = bitcast <16 x i1> [[T0]] to i16
|
||||
; CHECK-NEXT: ret i16 [[T1]]
|
||||
;
|
||||
%sub.i5 = fsub ninf <16 x float> %a, %b
|
||||
%t0 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %sub.i5, <16 x float> zeroinitializer, i32 11, i32 4)
|
||||
%t0 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %sub.i5, <16 x float> zeroinitializer, i32 11, i32 4)
|
||||
%t1 = bitcast <16 x i1> %t0 to i16
|
||||
ret i16 %t1
|
||||
}
|
||||
|
||||
define i8 @sub_compare_folding_swapPD128(<2 x double> %a, <2 x double> %b){
|
||||
; CHECK-LABEL: @sub_compare_folding_swapPD128(
|
||||
; CHECK-NEXT: [[T0:%.*]] = call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> [[B:%.*]], <2 x double> [[A:%.*]], i32 5)
|
||||
; CHECK-NEXT: [[T0:%.*]] = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> [[B:%.*]], <2 x double> [[A:%.*]], i32 5)
|
||||
; CHECK-NEXT: [[T1:%.*]] = shufflevector <2 x i1> [[T0]], <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
||||
; CHECK-NEXT: [[T2:%.*]] = bitcast <8 x i1> [[T1]] to i8
|
||||
; CHECK-NEXT: ret i8 [[T2]]
|
||||
;
|
||||
%sub.i = fsub ninf <2 x double> %a, %b
|
||||
%t0 = call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> zeroinitializer, <2 x double> %sub.i, i32 5)
|
||||
%t0 = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> zeroinitializer, <2 x double> %sub.i, i32 5)
|
||||
%t1 = shufflevector <2 x i1> %t0, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
||||
%t2 = bitcast <8 x i1> %t1 to i8
|
||||
ret i8 %t2
|
||||
|
@ -126,13 +126,13 @@ define i8 @sub_compare_folding_swapPD128(<2 x double> %a, <2 x double> %b){
|
|||
|
||||
define i8 @sub_compare_folding_swapPD256(<4 x double> %a, <4 x double> %b){
|
||||
; CHECK-LABEL: @sub_compare_folding_swapPD256(
|
||||
; CHECK-NEXT: [[T0:%.*]] = call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> [[B:%.*]], <4 x double> [[A:%.*]], i32 5)
|
||||
; CHECK-NEXT: [[T0:%.*]] = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> [[B:%.*]], <4 x double> [[A:%.*]], i32 5)
|
||||
; CHECK-NEXT: [[T1:%.*]] = shufflevector <4 x i1> [[T0]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
; CHECK-NEXT: [[T2:%.*]] = bitcast <8 x i1> [[T1]] to i8
|
||||
; CHECK-NEXT: ret i8 [[T2]]
|
||||
;
|
||||
%sub.i = fsub ninf <4 x double> %a, %b
|
||||
%t0 = call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> zeroinitializer, <4 x double> %sub.i, i32 5)
|
||||
%t0 = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> zeroinitializer, <4 x double> %sub.i, i32 5)
|
||||
%t1 = shufflevector <4 x i1> %t0, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%t2 = bitcast <8 x i1> %t1 to i8
|
||||
ret i8 %t2
|
||||
|
@ -140,13 +140,13 @@ define i8 @sub_compare_folding_swapPD256(<4 x double> %a, <4 x double> %b){
|
|||
|
||||
define i8 @sub_compare_folding_swapPD256_undef(<4 x double> %a, <4 x double> %b) {
|
||||
; CHECK-LABEL: @sub_compare_folding_swapPD256_undef(
|
||||
; CHECK-NEXT: [[TMP:%.*]] = call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> undef, <4 x double> zeroinitializer, i32 5)
|
||||
; CHECK-NEXT: [[TMP:%.*]] = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> undef, <4 x double> zeroinitializer, i32 5)
|
||||
; CHECK-NEXT: [[T0:%.*]] = shufflevector <4 x i1> [[TMP]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
; CHECK-NEXT: [[T1:%.*]] = bitcast <8 x i1> [[T0]] to i8
|
||||
; CHECK-NEXT: ret i8 [[T1]]
|
||||
;
|
||||
%sub.i1 = fsub ninf <4 x double> undef, undef
|
||||
%tmp = call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %sub.i1, <4 x double> zeroinitializer, i32 5)
|
||||
%tmp = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> %sub.i1, <4 x double> zeroinitializer, i32 5)
|
||||
%t0 = shufflevector <4 x i1> %tmp, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%t1 = bitcast <8 x i1> %t0 to i8
|
||||
ret i8 %t1
|
||||
|
@ -154,25 +154,25 @@ define i8 @sub_compare_folding_swapPD256_undef(<4 x double> %a, <4 x double> %b)
|
|||
|
||||
define i8 @sub_compare_folding_swapPD512(<8 x double> %a, <8 x double> %b){
|
||||
; CHECK-LABEL: @sub_compare_folding_swapPD512(
|
||||
; CHECK-NEXT: [[T0:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> [[B:%.*]], <8 x double> [[A:%.*]], i32 11, i32 4)
|
||||
; CHECK-NEXT: [[T0:%.*]] = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> [[B:%.*]], <8 x double> [[A:%.*]], i32 11, i32 4)
|
||||
; CHECK-NEXT: [[T1:%.*]] = bitcast <8 x i1> [[T0]] to i8
|
||||
; CHECK-NEXT: ret i8 [[T1]]
|
||||
;
|
||||
%sub.i = fsub ninf <8 x double> %a, %b
|
||||
%t0 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> zeroinitializer, <8 x double> %sub.i, i32 11, i32 4)
|
||||
%t0 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> zeroinitializer, <8 x double> %sub.i, i32 11, i32 4)
|
||||
%t1 = bitcast <8 x i1> %t0 to i8
|
||||
ret i8 %t1
|
||||
}
|
||||
|
||||
define i8 @sub_compare_folding_swapPS128(<4 x float> %a, <4 x float> %b){
|
||||
; CHECK-LABEL: @sub_compare_folding_swapPS128(
|
||||
; CHECK-NEXT: [[T0:%.*]] = call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> [[B:%.*]], <4 x float> [[A:%.*]], i32 12)
|
||||
; CHECK-NEXT: [[T0:%.*]] = call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> [[B:%.*]], <4 x float> [[A:%.*]], i32 12)
|
||||
; CHECK-NEXT: [[T1:%.*]] = shufflevector <4 x i1> [[T0]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
; CHECK-NEXT: [[T2:%.*]] = bitcast <8 x i1> [[T1]] to i8
|
||||
; CHECK-NEXT: ret i8 [[T2]]
|
||||
;
|
||||
%sub.i = fsub ninf <4 x float> %a, %b
|
||||
%t0 = call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> zeroinitializer, <4 x float> %sub.i, i32 12)
|
||||
%t0 = call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> zeroinitializer, <4 x float> %sub.i, i32 12)
|
||||
%t1 = shufflevector <4 x i1> %t0, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%t2 = bitcast <8 x i1> %t1 to i8
|
||||
ret i8 %t2
|
||||
|
@ -180,31 +180,31 @@ define i8 @sub_compare_folding_swapPS128(<4 x float> %a, <4 x float> %b){
|
|||
|
||||
define i8 @sub_compare_folding_swapPS256(<8 x float> %a, <8 x float> %b){
|
||||
; CHECK-LABEL: @sub_compare_folding_swapPS256(
|
||||
; CHECK-NEXT: [[T0:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> [[B:%.*]], <8 x float> [[A:%.*]], i32 5)
|
||||
; CHECK-NEXT: [[T0:%.*]] = call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> [[B:%.*]], <8 x float> [[A:%.*]], i32 5)
|
||||
; CHECK-NEXT: [[T1:%.*]] = bitcast <8 x i1> [[T0]] to i8
|
||||
; CHECK-NEXT: ret i8 [[T1]]
|
||||
;
|
||||
%sub.i = fsub ninf <8 x float> %a, %b
|
||||
%t0 = call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> zeroinitializer, <8 x float> %sub.i, i32 5)
|
||||
%t0 = call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> zeroinitializer, <8 x float> %sub.i, i32 5)
|
||||
%t1 = bitcast <8 x i1> %t0 to i8
|
||||
ret i8 %t1
|
||||
}
|
||||
|
||||
define i16 @sub_compare_folding_swapPS512(<16 x float> %a, <16 x float> %b){
|
||||
; CHECK-LABEL: @sub_compare_folding_swapPS512(
|
||||
; CHECK-NEXT: [[T0:%.*]] = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> [[B:%.*]], <16 x float> [[A:%.*]], i32 11, i32 4)
|
||||
; CHECK-NEXT: [[T0:%.*]] = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> [[B:%.*]], <16 x float> [[A:%.*]], i32 11, i32 4)
|
||||
; CHECK-NEXT: [[T1:%.*]] = bitcast <16 x i1> [[T0]] to i16
|
||||
; CHECK-NEXT: ret i16 [[T1]]
|
||||
;
|
||||
%sub.i = fsub ninf <16 x float> %a, %b
|
||||
%t0 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> zeroinitializer, <16 x float> %sub.i, i32 11, i32 4)
|
||||
%t0 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> zeroinitializer, <16 x float> %sub.i, i32 11, i32 4)
|
||||
%t1 = bitcast <16 x i1> %t0 to i16
|
||||
ret i16 %t1
|
||||
}
|
||||
|
||||
declare <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double>, <2 x double>, i32)
|
||||
declare <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double>, <4 x double>, i32)
|
||||
declare <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double>, <8 x double>, i32, i32)
|
||||
declare <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float>, <4 x float>, i32)
|
||||
declare <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float>, <8 x float>, i32)
|
||||
declare <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, i32)
|
||||
declare <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double>, <2 x double>, i32)
|
||||
declare <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double>, <4 x double>, i32)
|
||||
declare <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double>, <8 x double>, i32, i32)
|
||||
declare <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float>, <4 x float>, i32)
|
||||
declare <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float>, <8 x float>, i32)
|
||||
declare <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float>, <16 x float>, i32, i32)
|
||||
|
|
Loading…
Reference in New Issue