forked from OSchip/llvm-project
[X86] Remove llvm.x86.avx512.cvt*2mask.* intrinsics and autoupgrade to (icmp slt X, 0)
I had to drop fast-isel-abort from a test because we can't fast isel some of the mask stuff. When we used intrinsics we implicitly fell back to SelectionDAG for the intrinsic call without triggering the abort error. But with native IR that doesn't happen the same way. llvm-svn: 322050
This commit is contained in:
parent
7c2abdd249
commit
cc342d465e
|
@ -3801,35 +3801,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||
def int_x86_avx512_cvtsi2sd64 : GCCBuiltin<"__builtin_ia32_cvtsi2sd64">,
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
|
||||
llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_cvtb2mask_128 : // TODO remove this intrinsic
|
||||
Intrinsic<[llvm_i16_ty], [llvm_v16i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_cvtb2mask_256 : // TODO remove this intrinsic
|
||||
Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_cvtb2mask_512 : // TODO remove this intrinsic
|
||||
Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_cvtw2mask_128 : // TODO remove this intrinsic
|
||||
Intrinsic<[llvm_i8_ty], [llvm_v8i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_cvtw2mask_256 : // TODO remove this intrinsic
|
||||
Intrinsic<[llvm_i16_ty], [llvm_v16i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_cvtw2mask_512 : // TODO remove this intrinsic
|
||||
Intrinsic<[llvm_i32_ty], [llvm_v32i16_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_cvtd2mask_128 : // TODO remove this intrinsic
|
||||
Intrinsic<[llvm_i8_ty], [llvm_v4i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_cvtd2mask_256 : // TODO remove this intrinsic
|
||||
Intrinsic<[llvm_i8_ty], [llvm_v8i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_cvtd2mask_512 : // TODO remove this intrinsic
|
||||
Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_cvtq2mask_128 : // TODO remove this intrinsic
|
||||
Intrinsic<[llvm_i8_ty], [llvm_v2i64_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_cvtq2mask_256 : // TODO remove this intrinsic
|
||||
Intrinsic<[llvm_i8_ty], [llvm_v4i64_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_cvtq2mask_512 : // TODO remove this intrinsic
|
||||
Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty], [IntrNoMem]>;
|
||||
|
||||
}
|
||||
|
||||
// Pack ops.
|
||||
|
|
|
@ -157,6 +157,10 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
|
|||
Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
|
||||
Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
|
||||
Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
|
||||
Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
|
||||
Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
|
||||
Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
|
||||
Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
|
||||
Name == "avx512.mask.add.pd.128" || // Added in 4.0
|
||||
Name == "avx512.mask.add.pd.256" || // Added in 4.0
|
||||
Name == "avx512.mask.add.ps.128" || // Added in 4.0
|
||||
|
@ -829,9 +833,11 @@ static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
|
|||
// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
|
||||
static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder,Value *Vec, Value *Mask,
|
||||
unsigned NumElts) {
|
||||
const auto *C = dyn_cast<Constant>(Mask);
|
||||
if (!C || !C->isAllOnesValue())
|
||||
Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
|
||||
if (Mask) {
|
||||
const auto *C = dyn_cast<Constant>(Mask);
|
||||
if (!C || !C->isAllOnesValue())
|
||||
Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
|
||||
}
|
||||
|
||||
if (NumElts < 8) {
|
||||
uint32_t Indices[8];
|
||||
|
@ -1115,6 +1121,15 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
|||
} else if (IsX86 && Name.startswith("avx512.mask.ucmp")) {
|
||||
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
|
||||
Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
|
||||
} else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") ||
|
||||
Name.startswith("avx512.cvtw2mask.") ||
|
||||
Name.startswith("avx512.cvtd2mask.") ||
|
||||
Name.startswith("avx512.cvtq2mask."))) {
|
||||
Value *Op = CI->getArgOperand(0);
|
||||
Value *Zero = llvm::Constant::getNullValue(Op->getType());
|
||||
Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
|
||||
Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr,
|
||||
Op->getType()->getVectorNumElements());
|
||||
} else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
|
||||
Name == "ssse3.pabs.w.128" ||
|
||||
Name == "ssse3.pabs.d.128" ||
|
||||
|
|
|
@ -20518,19 +20518,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
Src1, Src2, Src3, Imm, Rnd),
|
||||
Mask, Passthru, Subtarget, DAG);
|
||||
}
|
||||
case CONVERT_TO_MASK: {
|
||||
MVT SrcVT = Op.getOperand(1).getSimpleValueType();
|
||||
MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements());
|
||||
MVT BitcastVT = MVT::getVectorVT(MVT::i1, VT.getSizeInBits());
|
||||
|
||||
SDValue CvtMask = DAG.getNode(X86ISD::PCMPGTM, dl, MaskVT,
|
||||
DAG.getConstant(0, dl, SrcVT),
|
||||
Op.getOperand(1));
|
||||
SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT,
|
||||
DAG.getUNDEF(BitcastVT), CvtMask,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
return DAG.getBitcast(Op.getValueType(), Res);
|
||||
}
|
||||
case ROUNDP: {
|
||||
assert(IntrData->Opc0 == X86ISD::VRNDSCALE && "Unexpected opcode");
|
||||
// Clear the upper bits of the rounding immediate so that the legacy
|
||||
|
|
|
@ -37,7 +37,7 @@ enum IntrinsicType : uint16_t {
|
|||
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
|
||||
EXPAND_FROM_MEM,
|
||||
TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS,
|
||||
FIXUPIMMS_MASKZ, CONVERT_TO_MASK, GATHER_AVX2, MASK_BINOP,
|
||||
FIXUPIMMS_MASKZ, GATHER_AVX2, MASK_BINOP,
|
||||
ROUNDP, ROUNDS
|
||||
};
|
||||
|
||||
|
@ -449,15 +449,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
|||
X86_INTRINSIC_DATA(avx2_psubs_w, INTR_TYPE_2OP, X86ISD::SUBS, 0),
|
||||
X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
|
||||
X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_cvtb2mask_128, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_cvtb2mask_256, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_cvtb2mask_512, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_cvtd2mask_128, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_cvtd2mask_256, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_cvtd2mask_512, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_cvtq2mask_128, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_cvtq2mask_256, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_cvtq2mask_512, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_cvtsi2sd64, INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_cvtsi2ss32, INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_cvtsi2ss64, INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0),
|
||||
|
@ -472,9 +463,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
|||
X86_INTRINSIC_DATA(avx512_cvtusi2ss, INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_cvtusi642sd, INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_cvtusi642ss, INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_cvtw2mask_128, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_cvtw2mask_256, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_cvtw2mask_512, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
|
||||
X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
|
||||
X86_INTRINSIC_DATA(avx512_kand_w, MASK_BINOP, ISD::AND, 0),
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -O0 -mtriple=x86_64-apple-darwin -mcpu=skx -fast-isel-abort=1 | FileCheck %s
|
||||
; RUN: llc < %s -O0 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
|
||||
|
||||
; ModuleID = 'mask_set.c'
|
||||
source_filename = "mask_set.c"
|
||||
|
@ -17,51 +17,53 @@ declare i32 @check_mask16(i16 zeroext %res_mask, i16 zeroext %exp_mask, i8* %fna
|
|||
define void @test_xmm(i32 %shift, i32 %mulp, <2 x i64> %a,i8* %arraydecay,i8* %fname){
|
||||
; CHECK-LABEL: test_xmm:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: subq $56, %rsp
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 64
|
||||
; CHECK-NEXT: movl $2, %esi
|
||||
; CHECK-NEXT: movl $8, %eax
|
||||
; CHECK-NEXT: subq $72, %rsp
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 80
|
||||
; CHECK-NEXT: movl $4, %eax
|
||||
; CHECK-NEXT: vpmovw2m %xmm0, %k0
|
||||
; CHECK-NEXT: kmovd %k0, %edi
|
||||
; CHECK-NEXT: movb %dil, %r8b
|
||||
; CHECK-NEXT: movzbl %r8b, %edi
|
||||
; CHECK-NEXT: movw %di, %r9w
|
||||
; CHECK-NEXT: movl $2, %esi
|
||||
; CHECK-NEXT: movl $8, %edi
|
||||
; CHECK-NEXT: movl %edi, {{[0-9]+}}(%rsp) ## 4-byte Spill
|
||||
; CHECK-NEXT: movq %rdx, %rdi
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r8d ## 4-byte Reload
|
||||
; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp) ## 8-byte Spill
|
||||
; CHECK-NEXT: movl %eax, %edx
|
||||
; CHECK-NEXT: movw %r9w, {{[0-9]+}}(%rsp) ## 2-byte Spill
|
||||
; CHECK-NEXT: movl %r8d, %edx
|
||||
; CHECK-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill
|
||||
; CHECK-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) ## 16-byte Spill
|
||||
; CHECK-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill
|
||||
; CHECK-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Spill
|
||||
; CHECK-NEXT: callq _calc_expected_mask_val
|
||||
; CHECK-NEXT: movw %ax, %r9w
|
||||
; CHECK-NEXT: movw {{[0-9]+}}(%rsp), %r10w ## 2-byte Reload
|
||||
; CHECK-NEXT: movzwl %r10w, %edi
|
||||
; CHECK-NEXT: movl %eax, %edx
|
||||
; CHECK-NEXT: movw %dx, %r9w
|
||||
; CHECK-NEXT: movzwl %r9w, %esi
|
||||
; CHECK-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
|
||||
; CHECK-NEXT: kmovb %k0, %edi
|
||||
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdx ## 8-byte Reload
|
||||
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
|
||||
; CHECK-NEXT: callq _check_mask16
|
||||
; CHECK-NEXT: movl $4, %esi
|
||||
; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 ## 16-byte Reload
|
||||
; CHECK-NEXT: vpmovd2m %xmm0, %k0
|
||||
; CHECK-NEXT: kmovd %k0, %edi
|
||||
; CHECK-NEXT: movb %dil, %r8b
|
||||
; CHECK-NEXT: movzbl %r8b, %edi
|
||||
; CHECK-NEXT: movw %di, %r9w
|
||||
; CHECK-NEXT: kmovq %k0, %k1
|
||||
; CHECK-NEXT: kmovd %k0, %esi
|
||||
; CHECK-NEXT: movb %sil, %r10b
|
||||
; CHECK-NEXT: movzbl %r10b, %esi
|
||||
; CHECK-NEXT: movw %si, %r9w
|
||||
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdi ## 8-byte Reload
|
||||
; CHECK-NEXT: movl %esi, {{[0-9]+}}(%rsp) ## 4-byte Spill
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %esi ## 4-byte Reload
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %edx ## 4-byte Reload
|
||||
; CHECK-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill
|
||||
; CHECK-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
|
||||
; CHECK-NEXT: movw %r9w, {{[0-9]+}}(%rsp) ## 2-byte Spill
|
||||
; CHECK-NEXT: callq _calc_expected_mask_val
|
||||
; CHECK-NEXT: movw %ax, %r9w
|
||||
; CHECK-NEXT: movw {{[0-9]+}}(%rsp), %r10w ## 2-byte Reload
|
||||
; CHECK-NEXT: movzwl %r10w, %edi
|
||||
; CHECK-NEXT: movw {{[0-9]+}}(%rsp), %r11w ## 2-byte Reload
|
||||
; CHECK-NEXT: movzwl %r11w, %edi
|
||||
; CHECK-NEXT: movzwl %r9w, %esi
|
||||
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdx ## 8-byte Reload
|
||||
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
|
||||
; CHECK-NEXT: callq _check_mask16
|
||||
; CHECK-NEXT: movl %eax, (%rsp) ## 4-byte Spill
|
||||
; CHECK-NEXT: addq $56, %rsp
|
||||
; CHECK-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill
|
||||
; CHECK-NEXT: addq $72, %rsp
|
||||
; CHECK-NEXT: retq
|
||||
%d2 = bitcast <2 x i64> %a to <8 x i16>
|
||||
%m2 = call i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16> %d2)
|
||||
|
|
|
@ -3590,3 +3590,47 @@ define i32@test_int_x86_avx512_ptestnm_w_512(<32 x i16> %x0, <32 x i16> %x1, i32
|
|||
ret i32 %res2
|
||||
}
|
||||
|
||||
declare i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8>)
|
||||
|
||||
define i64@test_int_x86_avx512_cvtb2mask_512(<64 x i8> %x0) {
|
||||
; AVX512BW-LABEL: test_int_x86_avx512_cvtb2mask_512:
|
||||
; AVX512BW: ## %bb.0:
|
||||
; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
|
||||
; AVX512BW-NEXT: kmovq %k0, %rax
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_int_x86_avx512_cvtb2mask_512:
|
||||
; AVX512F-32: # %bb.0:
|
||||
; AVX512F-32-NEXT: subl $12, %esp
|
||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
|
||||
; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
|
||||
; AVX512F-32-NEXT: kmovq %k0, (%esp)
|
||||
; AVX512F-32-NEXT: movl (%esp), %eax
|
||||
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: addl $12, %esp
|
||||
; AVX512F-32-NEXT: vzeroupper
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8> %x0)
|
||||
ret i64 %res
|
||||
}
|
||||
|
||||
declare i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16>)
|
||||
|
||||
define i32@test_int_x86_avx512_cvtw2mask_512(<32 x i16> %x0) {
|
||||
; AVX512BW-LABEL: test_int_x86_avx512_cvtw2mask_512:
|
||||
; AVX512BW: ## %bb.0:
|
||||
; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
|
||||
; AVX512BW-NEXT: kmovd %k0, %eax
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_int_x86_avx512_cvtw2mask_512:
|
||||
; AVX512F-32: # %bb.0:
|
||||
; AVX512F-32-NEXT: vpmovw2m %zmm0, %k0
|
||||
; AVX512F-32-NEXT: kmovd %k0, %eax
|
||||
; AVX512F-32-NEXT: vzeroupper
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16> %x0)
|
||||
ret i32 %res
|
||||
}
|
||||
|
|
|
@ -1455,47 +1455,6 @@ define <8 x i64>@test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8>
|
|||
ret <8 x i64> %res2
|
||||
}
|
||||
|
||||
declare i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8>)
|
||||
|
||||
define i64@test_int_x86_avx512_cvtb2mask_512(<64 x i8> %x0) {
|
||||
; AVX512BW-LABEL: test_int_x86_avx512_cvtb2mask_512:
|
||||
; AVX512BW: ## %bb.0:
|
||||
; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
|
||||
; AVX512BW-NEXT: kmovq %k0, %rax
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_int_x86_avx512_cvtb2mask_512:
|
||||
; AVX512F-32: # %bb.0:
|
||||
; AVX512F-32-NEXT: subl $12, %esp
|
||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
|
||||
; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
|
||||
; AVX512F-32-NEXT: kmovq %k0, (%esp)
|
||||
; AVX512F-32-NEXT: movl (%esp), %eax
|
||||
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: addl $12, %esp
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8> %x0)
|
||||
ret i64 %res
|
||||
}
|
||||
|
||||
declare i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16>)
|
||||
|
||||
define i32@test_int_x86_avx512_cvtw2mask_512(<32 x i16> %x0) {
|
||||
; AVX512BW-LABEL: test_int_x86_avx512_cvtw2mask_512:
|
||||
; AVX512BW: ## %bb.0:
|
||||
; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
|
||||
; AVX512BW-NEXT: kmovd %k0, %eax
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_int_x86_avx512_cvtw2mask_512:
|
||||
; AVX512F-32: # %bb.0:
|
||||
; AVX512F-32-NEXT: vpmovw2m %zmm0, %k0
|
||||
; AVX512F-32-NEXT: kmovd %k0, %eax
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16> %x0)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
declare <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
||||
|
||||
define <32 x i16>@test_int_x86_avx512_mask_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
|
||||
|
|
|
@ -3826,3 +3826,55 @@ define i16@test_int_x86_avx512_ptestnm_w_256(<16 x i16> %x0, <16 x i16> %x1, i16
|
|||
ret i16 %res2
|
||||
}
|
||||
|
||||
declare i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8>)
|
||||
|
||||
define i16@test_int_x86_avx512_cvtb2mask_128(<16 x i8> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpmovb2m %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x29,0xc0]
|
||||
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
|
||||
; CHECK-NEXT: ## kill: def %ax killed %ax killed %eax
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8> %x0)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
declare i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8>)
|
||||
|
||||
define i32@test_int_x86_avx512_cvtb2mask_256(<32 x i8> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpmovb2m %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x29,0xc0]
|
||||
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
|
||||
; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8> %x0)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16>)
|
||||
|
||||
define i8@test_int_x86_avx512_cvtw2mask_128(<8 x i16> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpmovw2m %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x29,0xc0]
|
||||
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
|
||||
; CHECK-NEXT: ## kill: def %al killed %al killed %eax
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16> %x0)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
declare i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16>)
|
||||
|
||||
define i16@test_int_x86_avx512_cvtw2mask_256(<16 x i16> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpmovw2m %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x29,0xc0]
|
||||
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
|
||||
; CHECK-NEXT: ## kill: def %ax killed %ax killed %eax
|
||||
; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16> %x0)
|
||||
ret i16 %res
|
||||
}
|
||||
|
|
|
@ -2304,57 +2304,6 @@ define <16 x i16>@test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8>
|
|||
ret <16 x i16> %res4
|
||||
}
|
||||
|
||||
declare i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8>)
|
||||
|
||||
define i16@test_int_x86_avx512_cvtb2mask_128(<16 x i8> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpmovb2m %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x29,0xc0]
|
||||
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
|
||||
; CHECK-NEXT: ## kill: def %ax killed %ax killed %eax
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8> %x0)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
declare i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8>)
|
||||
|
||||
define i32@test_int_x86_avx512_cvtb2mask_256(<32 x i8> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpmovb2m %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x29,0xc0]
|
||||
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8> %x0)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16>)
|
||||
|
||||
define i8@test_int_x86_avx512_cvtw2mask_128(<8 x i16> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpmovw2m %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x29,0xc0]
|
||||
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
|
||||
; CHECK-NEXT: ## kill: def %al killed %al killed %eax
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16> %x0)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
declare i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16>)
|
||||
|
||||
define i16@test_int_x86_avx512_cvtw2mask_256(<16 x i16> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpmovw2m %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x29,0xc0]
|
||||
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
|
||||
; CHECK-NEXT: ## kill: def %ax killed %ax killed %eax
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16> %x0)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
declare <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16)
|
||||
|
||||
define <16 x i16>@test_int_x86_avx512_mask_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
|
||||
|
|
|
@ -328,3 +328,28 @@ define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x2_512(<4 x i32> %x0, <16
|
|||
ret <16 x i32> %res4
|
||||
}
|
||||
|
||||
declare i16 @llvm.x86.avx512.cvtd2mask.512(<16 x i32>)
|
||||
|
||||
define i16@test_int_x86_avx512_cvtd2mask_512(<16 x i32> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpmovd2m %zmm0, %k0
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
; CHECK-NEXT: ## kill: def %ax killed %ax killed %eax
|
||||
; CHECK-NEXT: retq
|
||||
%res = call i16 @llvm.x86.avx512.cvtd2mask.512(<16 x i32> %x0)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.cvtq2mask.512(<8 x i64>)
|
||||
|
||||
define i8@test_int_x86_avx512_cvtq2mask_512(<8 x i64> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpmovq2m %zmm0, %k0
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
; CHECK-NEXT: ## kill: def %al killed %al killed %eax
|
||||
; CHECK-NEXT: retq
|
||||
%res = call i8 @llvm.x86.avx512.cvtq2mask.512(<8 x i64> %x0)
|
||||
ret i8 %res
|
||||
}
|
||||
|
|
|
@ -438,29 +438,3 @@ define i8 @test_int_x86_avx512_mask_fpclass_ss_load(<4 x float>* %x0ptr, i8 %x1)
|
|||
%res = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 -1)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
declare i16 @llvm.x86.avx512.cvtd2mask.512(<16 x i32>)
|
||||
|
||||
define i16@test_int_x86_avx512_cvtd2mask_512(<16 x i32> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpmovd2m %zmm0, %k0
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
; CHECK-NEXT: ## kill: def %ax killed %ax killed %eax
|
||||
; CHECK-NEXT: retq
|
||||
%res = call i16 @llvm.x86.avx512.cvtd2mask.512(<16 x i32> %x0)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.cvtq2mask.512(<8 x i64>)
|
||||
|
||||
define i8@test_int_x86_avx512_cvtq2mask_512(<8 x i64> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpmovq2m %zmm0, %k0
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
; CHECK-NEXT: ## kill: def %al killed %al killed %eax
|
||||
; CHECK-NEXT: retq
|
||||
%res = call i8 @llvm.x86.avx512.cvtq2mask.512(<8 x i64> %x0)
|
||||
ret i8 %res
|
||||
}
|
||||
|
|
|
@ -1804,3 +1804,54 @@ define <4 x i32>@test_int_x86_avx512_mask_broadcasti32x2_128(<4 x i32> %x0, <4 x
|
|||
ret <4 x i32> %res4
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32>)
|
||||
|
||||
define i8@test_int_x86_avx512_cvtd2mask_128(<4 x i32> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpmovd2m %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x39,0xc0]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: ## kill: def %al killed %al killed %eax
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32> %x0)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.cvtd2mask.256(<8 x i32>)
|
||||
|
||||
define i8@test_int_x86_avx512_cvtd2mask_256(<8 x i32> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpmovd2m %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x39,0xc0]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: ## kill: def %al killed %al killed %eax
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.cvtd2mask.256(<8 x i32> %x0)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64>)
|
||||
|
||||
define i8@test_int_x86_avx512_cvtq2mask_128(<2 x i64> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpmovq2m %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x39,0xc0]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: ## kill: def %al killed %al killed %eax
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64> %x0)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64>)
|
||||
|
||||
define i8@test_int_x86_avx512_cvtq2mask_256(<4 x i64> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpmovq2m %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x39,0xc0]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: ## kill: def %al killed %al killed %eax
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64> %x0)
|
||||
ret i8 %res
|
||||
}
|
||||
|
|
|
@ -624,55 +624,3 @@ define i8 @test_int_x86_avx512_mask_fpclass_pd_256(<4 x double> %x0, i8 %x1) {
|
|||
%res2 = add i8 %res, %res1
|
||||
ret i8 %res2
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32>)
|
||||
|
||||
define i8@test_int_x86_avx512_cvtd2mask_128(<4 x i32> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpmovd2m %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x39,0xc0]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: ## kill: def %al killed %al killed %eax
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32> %x0)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.cvtd2mask.256(<8 x i32>)
|
||||
|
||||
define i8@test_int_x86_avx512_cvtd2mask_256(<8 x i32> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpmovd2m %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x39,0xc0]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: ## kill: def %al killed %al killed %eax
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.cvtd2mask.256(<8 x i32> %x0)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64>)
|
||||
|
||||
define i8@test_int_x86_avx512_cvtq2mask_128(<2 x i64> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpmovq2m %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x39,0xc0]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: ## kill: def %al killed %al killed %eax
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64> %x0)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64>)
|
||||
|
||||
define i8@test_int_x86_avx512_cvtq2mask_256(<4 x i64> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpmovq2m %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x39,0xc0]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: ## kill: def %al killed %al killed %eax
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64> %x0)
|
||||
ret i8 %res
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue