forked from OSchip/llvm-project
[AVX512] Remove the masked vpcmpeq/vcmpgt intrinsics and autoupgrade them to native icmps.
llvm-svn: 273240
This commit is contained in:
parent
1fb3fd9c8c
commit
0a0fb0fda1
|
@ -6963,31 +6963,6 @@ let TargetPrefix = "x86" in {
|
|||
def int_x86_avx512_vcomi_ss : GCCBuiltin<"__builtin_ia32_vcomiss">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
|
||||
llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pcmpeq_b_512 :
|
||||
Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pcmpeq_w_512 :
|
||||
Intrinsic<[llvm_i32_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pcmpeq_d_512 :
|
||||
Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pcmpeq_q_512 :
|
||||
Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_pcmpgt_b_512:
|
||||
Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pcmpgt_w_512:
|
||||
Intrinsic<[llvm_i32_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pcmpgt_d_512:
|
||||
Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pcmpgt_q_512:
|
||||
Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_cmp_b_512: GCCBuiltin<"__builtin_ia32_cmpb512_mask">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i32_ty,
|
||||
|
@ -7016,32 +6991,6 @@ let TargetPrefix = "x86" in {
|
|||
llvm_i8_ty], [IntrNoMem]>;
|
||||
|
||||
// 256-bit
|
||||
def int_x86_avx512_mask_pcmpeq_b_256 :
|
||||
Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pcmpeq_w_256 :
|
||||
Intrinsic<[llvm_i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pcmpeq_d_256 :
|
||||
Intrinsic<[llvm_i8_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pcmpeq_q_256 :
|
||||
Intrinsic<[llvm_i8_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_pcmpgt_b_256:
|
||||
Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pcmpgt_w_256:
|
||||
Intrinsic<[llvm_i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pcmpgt_d_256:
|
||||
Intrinsic<[llvm_i8_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pcmpgt_q_256:
|
||||
Intrinsic<[llvm_i8_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_cmp_b_256: GCCBuiltin<"__builtin_ia32_cmpb256_mask">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
|
@ -7069,32 +7018,6 @@ let TargetPrefix = "x86" in {
|
|||
llvm_i8_ty], [IntrNoMem]>;
|
||||
|
||||
// 128-bit
|
||||
def int_x86_avx512_mask_pcmpeq_b_128 :
|
||||
Intrinsic<[llvm_i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pcmpeq_w_128 :
|
||||
Intrinsic<[llvm_i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pcmpeq_d_128 :
|
||||
Intrinsic<[llvm_i8_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pcmpeq_q_128 :
|
||||
Intrinsic<[llvm_i8_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_pcmpgt_b_128:
|
||||
Intrinsic<[llvm_i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pcmpgt_w_128:
|
||||
Intrinsic<[llvm_i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pcmpgt_d_128:
|
||||
Intrinsic<[llvm_i8_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pcmpgt_q_128:
|
||||
Intrinsic<[llvm_i8_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_cmp_b_128: GCCBuiltin<"__builtin_ia32_cmpb128_mask">,
|
||||
Intrinsic<[llvm_i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty,
|
||||
llvm_i16_ty], [IntrNoMem]>;
|
||||
|
|
|
@ -174,6 +174,8 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
|||
Name.startswith("x86.sse2.pcmpgt.") ||
|
||||
Name.startswith("x86.avx2.pcmpeq.") ||
|
||||
Name.startswith("x86.avx2.pcmpgt.") ||
|
||||
Name.startswith("x86.avx512.mask.pcmpeq.") ||
|
||||
Name.startswith("x86.avx512.mask.pcmpgt.") ||
|
||||
Name == "x86.sse41.pmaxsb" ||
|
||||
Name == "x86.sse2.pmaxs.w" ||
|
||||
Name == "x86.sse41.pmaxsd" ||
|
||||
|
@ -541,6 +543,30 @@ static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
|
|||
return Builder.CreateSelect(Cmp, Op0, Op1);
|
||||
}
|
||||
|
||||
static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
|
||||
ICmpInst::Predicate Pred) {
|
||||
Value *Op0 = CI.getArgOperand(0);
|
||||
unsigned NumElts = Op0->getType()->getVectorNumElements();
|
||||
Value *Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
|
||||
|
||||
Value *Mask = CI.getArgOperand(2);
|
||||
const auto *C = dyn_cast<Constant>(Mask);
|
||||
if (!C || !C->isAllOnesValue())
|
||||
Cmp = Builder.CreateAnd(Cmp, getX86MaskVec(Builder, Mask, NumElts));
|
||||
|
||||
if (NumElts < 8) {
|
||||
uint32_t Indices[8];
|
||||
for (unsigned i = 0; i != NumElts; ++i)
|
||||
Indices[i] = i;
|
||||
for (unsigned i = NumElts; i != 8; ++i)
|
||||
Indices[i] = NumElts;
|
||||
Cmp = Builder.CreateShuffleVector(Cmp, UndefValue::get(Cmp->getType()),
|
||||
Indices);
|
||||
}
|
||||
return Builder.CreateBitCast(Cmp, IntegerType::get(CI.getContext(),
|
||||
std::max(NumElts, 8U)));
|
||||
}
|
||||
|
||||
/// Upgrade a call to an old intrinsic. All argument and return casting must be
|
||||
/// provided to seamlessly integrate with existing context.
|
||||
void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||
|
@ -567,6 +593,10 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
|||
Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1),
|
||||
"pcmpgt");
|
||||
Rep = Builder.CreateSExt(Rep, CI->getType(), "");
|
||||
} else if (Name.startswith("llvm.x86.avx512.mask.pcmpeq.")) {
|
||||
Rep = upgradeMaskedCompare(Builder, *CI, ICmpInst::ICMP_EQ);
|
||||
} else if (Name.startswith("llvm.x86.avx512.mask.pcmpgt.")) {
|
||||
Rep = upgradeMaskedCompare(Builder, *CI, ICmpInst::ICMP_SGT);
|
||||
} else if (Name == "llvm.x86.sse41.pmaxsb" ||
|
||||
Name == "llvm.x86.sse2.pmaxs.w" ||
|
||||
Name == "llvm.x86.sse41.pmaxsd" ||
|
||||
|
|
|
@ -948,30 +948,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
|||
X86ISD::VBROADCAST, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pbroadcast_w_gpr_512, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VBROADCAST, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_128, CMP_MASK, X86ISD::PCMPEQM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_256, CMP_MASK, X86ISD::PCMPEQM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_512, CMP_MASK, X86ISD::PCMPEQM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_d_128, CMP_MASK, X86ISD::PCMPEQM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_d_256, CMP_MASK, X86ISD::PCMPEQM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_d_512, CMP_MASK, X86ISD::PCMPEQM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_q_128, CMP_MASK, X86ISD::PCMPEQM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_q_256, CMP_MASK, X86ISD::PCMPEQM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_q_512, CMP_MASK, X86ISD::PCMPEQM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_w_128, CMP_MASK, X86ISD::PCMPEQM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_w_256, CMP_MASK, X86ISD::PCMPEQM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_w_512, CMP_MASK, X86ISD::PCMPEQM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_b_128, CMP_MASK, X86ISD::PCMPGTM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_b_256, CMP_MASK, X86ISD::PCMPGTM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_b_512, CMP_MASK, X86ISD::PCMPGTM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_d_128, CMP_MASK, X86ISD::PCMPGTM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_d_256, CMP_MASK, X86ISD::PCMPGTM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_d_512, CMP_MASK, X86ISD::PCMPGTM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_q_128, CMP_MASK, X86ISD::PCMPGTM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_q_256, CMP_MASK, X86ISD::PCMPGTM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_q_512, CMP_MASK, X86ISD::PCMPGTM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_128, CMP_MASK, X86ISD::PCMPGTM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_256, CMP_MASK, X86ISD::PCMPGTM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_512, CMP_MASK, X86ISD::PCMPGTM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_perm_df_256, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VPERMI, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_perm_df_512, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VPERMI, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_perm_di_256, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VPERMI, 0),
|
||||
|
|
|
@ -276,3 +276,95 @@ define <16 x i32>@test_int_x86_avx512_mask_pshuf_d_512(<16 x i32> %x0, i32 %x1,
|
|||
%res4 = add <16 x i32> %res3, %res2
|
||||
ret <16 x i32> %res4
|
||||
}
|
||||
|
||||
define i16 @test_pcmpeq_d(<16 x i32> %a, <16 x i32> %b) {
|
||||
; CHECK-LABEL: test_pcmpeq_d:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpeq_d:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
declare i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32>, <16 x i32>, i16)
|
||||
|
||||
define i8 @test_pcmpeq_q(<8 x i64> %a, <8 x i64> %b) {
|
||||
; CHECK-LABEL: test_pcmpeq_q:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpeq_q:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64>, <8 x i64>, i8)
|
||||
|
||||
define i16 @test_pcmpgt_d(<16 x i32> %a, <16 x i32> %b) {
|
||||
; CHECK-LABEL: test_pcmpgt_d:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpgt_d:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
declare i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32>, <16 x i32>, i16)
|
||||
|
||||
define i8 @test_pcmpgt_q(<8 x i64> %a, <8 x i64> %b) {
|
||||
; CHECK-LABEL: test_pcmpgt_q:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpgt_q:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64>, <8 x i64>, i8)
|
||||
|
|
|
@ -970,98 +970,6 @@ define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) {
|
|||
|
||||
declare void @llvm.x86.avx512.mask.store.ss(i8*, <4 x float>, i8 )
|
||||
|
||||
define i16 @test_pcmpeq_d(<16 x i32> %a, <16 x i32> %b) {
|
||||
; CHECK-LABEL: test_pcmpeq_d:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpeq_d:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
declare i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32>, <16 x i32>, i16)
|
||||
|
||||
define i8 @test_pcmpeq_q(<8 x i64> %a, <8 x i64> %b) {
|
||||
; CHECK-LABEL: test_pcmpeq_q:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpeq_q:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64>, <8 x i64>, i8)
|
||||
|
||||
define i16 @test_pcmpgt_d(<16 x i32> %a, <16 x i32> %b) {
|
||||
; CHECK-LABEL: test_pcmpgt_d:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpgt_d:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
declare i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32>, <16 x i32>, i16)
|
||||
|
||||
define i8 @test_pcmpgt_q(<8 x i64> %a, <8 x i64> %b) {
|
||||
; CHECK-LABEL: test_pcmpgt_q:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpgt_q:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64>, <8 x i64>, i8)
|
||||
|
||||
define <8 x i16> @test_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
|
||||
; CHECK-LABEL: test_cmp_d_512:
|
||||
; CHECK: ## BB#0:
|
||||
|
|
|
@ -271,3 +271,172 @@ define <32 x i16>@test_int_x86_avx512_mask_pshufl_w_512(<32 x i16> %x0, i32 %x1,
|
|||
%res4 = add <32 x i16> %res3, %res2
|
||||
ret <32 x i16> %res4
|
||||
}
|
||||
|
||||
define i64 @test_pcmpeq_b(<64 x i8> %a, <64 x i8> %b) {
|
||||
; AVX512BW-LABEL: test_pcmpeq_b:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
|
||||
; AVX512BW-NEXT: kmovq %k0, %rax
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_pcmpeq_b:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: subl $12, %esp
|
||||
; AVX512F-32-NEXT: .Ltmp0:
|
||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
|
||||
; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
|
||||
; AVX512F-32-NEXT: kmovq %k0, (%esp)
|
||||
; AVX512F-32-NEXT: movl (%esp), %eax
|
||||
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: addl $12, %esp
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
|
||||
ret i64 %res
|
||||
}
|
||||
|
||||
define i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
|
||||
; AVX512BW-LABEL: test_mask_pcmpeq_b:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: kmovq %rdi, %k1
|
||||
; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
|
||||
; AVX512BW-NEXT: kmovq %k0, %rax
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_mask_pcmpeq_b:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: subl $12, %esp
|
||||
; AVX512F-32-NEXT: .Ltmp1:
|
||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
|
||||
; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
|
||||
; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
|
||||
; AVX512F-32-NEXT: kmovq %k0, (%esp)
|
||||
; AVX512F-32-NEXT: movl (%esp), %eax
|
||||
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: addl $12, %esp
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
|
||||
ret i64 %res
|
||||
}
|
||||
|
||||
declare i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8>, <64 x i8>, i64)
|
||||
|
||||
define i32 @test_pcmpeq_w(<32 x i16> %a, <32 x i16> %b) {
|
||||
; AVX512BW-LABEL: test_pcmpeq_w:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
|
||||
; AVX512BW-NEXT: kmovd %k0, %eax
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_pcmpeq_w:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
|
||||
; AVX512F-32-NEXT: kmovd %k0, %eax
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define i32 @test_mask_pcmpeq_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
|
||||
; AVX512BW-LABEL: test_mask_pcmpeq_w:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: kmovd %edi, %k1
|
||||
; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
|
||||
; AVX512BW-NEXT: kmovd %k0, %eax
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_mask_pcmpeq_w:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
|
||||
; AVX512F-32-NEXT: kmovd %k0, %eax
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
declare i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16>, <32 x i16>, i32)
|
||||
|
||||
define i64 @test_pcmpgt_b(<64 x i8> %a, <64 x i8> %b) {
|
||||
; AVX512BW-LABEL: test_pcmpgt_b:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k0
|
||||
; AVX512BW-NEXT: kmovq %k0, %rax
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_pcmpgt_b:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: subl $12, %esp
|
||||
; AVX512F-32-NEXT: .Ltmp2:
|
||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
|
||||
; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k0
|
||||
; AVX512F-32-NEXT: kmovq %k0, (%esp)
|
||||
; AVX512F-32-NEXT: movl (%esp), %eax
|
||||
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: addl $12, %esp
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
|
||||
ret i64 %res
|
||||
}
|
||||
|
||||
define i64 @test_mask_pcmpgt_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
|
||||
; AVX512BW-LABEL: test_mask_pcmpgt_b:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: kmovq %rdi, %k1
|
||||
; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1}
|
||||
; AVX512BW-NEXT: kmovq %k0, %rax
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_mask_pcmpgt_b:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: subl $12, %esp
|
||||
; AVX512F-32-NEXT: .Ltmp3:
|
||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
|
||||
; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
|
||||
; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1}
|
||||
; AVX512F-32-NEXT: kmovq %k0, (%esp)
|
||||
; AVX512F-32-NEXT: movl (%esp), %eax
|
||||
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: addl $12, %esp
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
|
||||
ret i64 %res
|
||||
}
|
||||
|
||||
declare i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8>, <64 x i8>, i64)
|
||||
|
||||
define i32 @test_pcmpgt_w(<32 x i16> %a, <32 x i16> %b) {
|
||||
; AVX512BW-LABEL: test_pcmpgt_w:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k0
|
||||
; AVX512BW-NEXT: kmovd %k0, %eax
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_pcmpgt_w:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: vpcmpgtw %zmm1, %zmm0, %k0
|
||||
; AVX512F-32-NEXT: kmovd %k0, %eax
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define i32 @test_mask_pcmpgt_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
|
||||
; AVX512BW-LABEL: test_mask_pcmpgt_w:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: kmovd %edi, %k1
|
||||
; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1}
|
||||
; AVX512BW-NEXT: kmovd %k0, %eax
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_mask_pcmpgt_w:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
; AVX512F-32-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1}
|
||||
; AVX512F-32-NEXT: kmovd %k0, %eax
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
declare i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16>, <32 x i16>, i32)
|
||||
|
||||
|
|
|
@ -2,178 +2,6 @@
|
|||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW
|
||||
; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F-32
|
||||
|
||||
define i64 @test_pcmpeq_b(<64 x i8> %a, <64 x i8> %b) {
|
||||
; AVX512BW-LABEL: test_pcmpeq_b:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
|
||||
; AVX512BW-NEXT: kmovq %k0, %rax
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_pcmpeq_b:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: subl $12, %esp
|
||||
; AVX512F-32-NEXT: .Ltmp0:
|
||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
|
||||
; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
|
||||
; AVX512F-32-NEXT: kmovq %k0, (%esp)
|
||||
; AVX512F-32-NEXT: movl (%esp), %eax
|
||||
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: addl $12, %esp
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
|
||||
ret i64 %res
|
||||
}
|
||||
|
||||
define i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
|
||||
; AVX512BW-LABEL: test_mask_pcmpeq_b:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: kmovq %rdi, %k1
|
||||
; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
|
||||
; AVX512BW-NEXT: kmovq %k0, %rax
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_mask_pcmpeq_b:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: subl $12, %esp
|
||||
; AVX512F-32-NEXT: .Ltmp1:
|
||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
|
||||
; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
|
||||
; AVX512F-32-NEXT: kmovq %k0, (%esp)
|
||||
; AVX512F-32-NEXT: movl (%esp), %eax
|
||||
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: addl $12, %esp
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
|
||||
ret i64 %res
|
||||
}
|
||||
|
||||
declare i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8>, <64 x i8>, i64)
|
||||
|
||||
define i32 @test_pcmpeq_w(<32 x i16> %a, <32 x i16> %b) {
|
||||
; AVX512BW-LABEL: test_pcmpeq_w:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
|
||||
; AVX512BW-NEXT: kmovd %k0, %eax
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_pcmpeq_w:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
|
||||
; AVX512F-32-NEXT: kmovd %k0, %eax
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define i32 @test_mask_pcmpeq_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
|
||||
; AVX512BW-LABEL: test_mask_pcmpeq_w:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: kmovd %edi, %k1
|
||||
; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
|
||||
; AVX512BW-NEXT: kmovd %k0, %eax
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_mask_pcmpeq_w:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
|
||||
; AVX512F-32-NEXT: kmovd %k0, %eax
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
declare i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16>, <32 x i16>, i32)
|
||||
|
||||
define i64 @test_pcmpgt_b(<64 x i8> %a, <64 x i8> %b) {
|
||||
; AVX512BW-LABEL: test_pcmpgt_b:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k0
|
||||
; AVX512BW-NEXT: kmovq %k0, %rax
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_pcmpgt_b:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: subl $12, %esp
|
||||
; AVX512F-32-NEXT: .Ltmp2:
|
||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
|
||||
; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k0
|
||||
; AVX512F-32-NEXT: kmovq %k0, (%esp)
|
||||
; AVX512F-32-NEXT: movl (%esp), %eax
|
||||
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: addl $12, %esp
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
|
||||
ret i64 %res
|
||||
}
|
||||
|
||||
define i64 @test_mask_pcmpgt_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
|
||||
; AVX512BW-LABEL: test_mask_pcmpgt_b:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: kmovq %rdi, %k1
|
||||
; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1}
|
||||
; AVX512BW-NEXT: kmovq %k0, %rax
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_mask_pcmpgt_b:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: subl $12, %esp
|
||||
; AVX512F-32-NEXT: .Ltmp3:
|
||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
|
||||
; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1}
|
||||
; AVX512F-32-NEXT: kmovq %k0, (%esp)
|
||||
; AVX512F-32-NEXT: movl (%esp), %eax
|
||||
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: addl $12, %esp
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
|
||||
ret i64 %res
|
||||
}
|
||||
|
||||
declare i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8>, <64 x i8>, i64)
|
||||
|
||||
define i32 @test_pcmpgt_w(<32 x i16> %a, <32 x i16> %b) {
|
||||
; AVX512BW-LABEL: test_pcmpgt_w:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k0
|
||||
; AVX512BW-NEXT: kmovd %k0, %eax
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_pcmpgt_w:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: vpcmpgtw %zmm1, %zmm0, %k0
|
||||
; AVX512F-32-NEXT: kmovd %k0, %eax
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define i32 @test_mask_pcmpgt_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
|
||||
; AVX512BW-LABEL: test_mask_pcmpgt_w:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: kmovd %edi, %k1
|
||||
; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1}
|
||||
; AVX512BW-NEXT: kmovd %k0, %eax
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_mask_pcmpgt_w:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
; AVX512F-32-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1}
|
||||
; AVX512F-32-NEXT: kmovd %k0, %eax
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
declare i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16>, <32 x i16>, i32)
|
||||
|
||||
define i64 @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
|
||||
; AVX512BW-LABEL: test_cmp_b_512:
|
||||
; AVX512BW: ## BB#0:
|
||||
|
@ -205,7 +33,7 @@ define i64 @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
|
|||
; AVX512F-32-LABEL: test_cmp_b_512:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: subl $68, %esp
|
||||
; AVX512F-32-NEXT: .Ltmp4:
|
||||
; AVX512F-32-NEXT: .Ltmp0:
|
||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 72
|
||||
; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
|
||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||
|
@ -291,7 +119,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
|
|||
; AVX512F-32-LABEL: test_mask_cmp_b_512:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: subl $68, %esp
|
||||
; AVX512F-32-NEXT: .Ltmp5:
|
||||
; AVX512F-32-NEXT: .Ltmp1:
|
||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 72
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
|
@ -381,7 +209,7 @@ define i64 @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
|
|||
; AVX512F-32-LABEL: test_ucmp_b_512:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: subl $68, %esp
|
||||
; AVX512F-32-NEXT: .Ltmp6:
|
||||
; AVX512F-32-NEXT: .Ltmp2:
|
||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 72
|
||||
; AVX512F-32-NEXT: vpcmpequb %zmm1, %zmm0, %k0
|
||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||
|
@ -467,7 +295,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
|
|||
; AVX512F-32-LABEL: test_mask_x86_avx512_ucmp_b_512:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: subl $68, %esp
|
||||
; AVX512F-32-NEXT: .Ltmp7:
|
||||
; AVX512F-32-NEXT: .Ltmp3:
|
||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 72
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
|
@ -2661,7 +2489,7 @@ define i64@test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) {
|
|||
; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_qd:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: subl $12, %esp
|
||||
; AVX512F-32-NEXT: .Ltmp8:
|
||||
; AVX512F-32-NEXT: .Ltmp4:
|
||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
|
@ -2687,7 +2515,7 @@ define i64@test_int_x86_avx512_cvtb2mask_512(<64 x i8> %x0) {
|
|||
; AVX512F-32-LABEL: test_int_x86_avx512_cvtb2mask_512:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: subl $12, %esp
|
||||
; AVX512F-32-NEXT: .Ltmp9:
|
||||
; AVX512F-32-NEXT: .Ltmp5:
|
||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
|
||||
; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
|
||||
; AVX512F-32-NEXT: kmovq %k0, (%esp)
|
||||
|
@ -3149,7 +2977,7 @@ define i64@test_int_x86_avx512_ptestm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x
|
|||
; AVX512F-32-LABEL: test_int_x86_avx512_ptestm_b_512:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: subl $20, %esp
|
||||
; AVX512F-32-NEXT: .Ltmp10:
|
||||
; AVX512F-32-NEXT: .Ltmp6:
|
||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 24
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
|
@ -3214,7 +3042,7 @@ define i64@test_int_x86_avx512_ptestnm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %
|
|||
; AVX512F-32-LABEL: test_int_x86_avx512_ptestnm_b_512:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: subl $20, %esp
|
||||
; AVX512F-32-NEXT: .Ltmp11:
|
||||
; AVX512F-32-NEXT: .Ltmp7:
|
||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 24
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
|
|
|
@ -266,3 +266,96 @@ define <16 x i16>@test_int_x86_avx512_mask_pshufl_w_256(<16 x i16> %x0, i32 %x1,
|
|||
%res4 = add <16 x i16> %res3, %res2
|
||||
ret <16 x i16> %res4
|
||||
}
|
||||
|
||||
define i32 @test_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b) {
|
||||
; CHECK-LABEL: test_pcmpeq_b_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
|
||||
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 -1)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define i32 @test_mask_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpeq_b_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x74,0xc1]
|
||||
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
declare i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8>, <32 x i8>, i32)
|
||||
|
||||
define i16 @test_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b) {
|
||||
; CHECK-LABEL: test_pcmpeq_w_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
define i16 @test_mask_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpeq_w_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
declare i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16>, <16 x i16>, i16)
|
||||
|
||||
define i32 @test_pcmpgt_b_256(<32 x i8> %a, <32 x i8> %b) {
|
||||
; CHECK-LABEL: test_pcmpgt_b_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1]
|
||||
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8> %a, <32 x i8> %b, i32 -1)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define i32 @test_mask_pcmpgt_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpgt_b_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x64,0xc1]
|
||||
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
declare i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8>, <32 x i8>, i32)
|
||||
|
||||
define i16 @test_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b) {
|
||||
; CHECK-LABEL: test_pcmpgt_w_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x65,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
define i16 @test_mask_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpgt_w_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x65,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
declare i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16>, <16 x i16>, i16)
|
||||
|
||||
|
|
|
@ -3,98 +3,6 @@
|
|||
|
||||
; 256-bit
|
||||
|
||||
define i32 @test_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b) {
|
||||
; CHECK-LABEL: test_pcmpeq_b_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
|
||||
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 -1)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define i32 @test_mask_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpeq_b_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x74,0xc1]
|
||||
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
declare i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8>, <32 x i8>, i32)
|
||||
|
||||
define i16 @test_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b) {
|
||||
; CHECK-LABEL: test_pcmpeq_w_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
define i16 @test_mask_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpeq_w_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
declare i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16>, <16 x i16>, i16)
|
||||
|
||||
define i32 @test_pcmpgt_b_256(<32 x i8> %a, <32 x i8> %b) {
|
||||
; CHECK-LABEL: test_pcmpgt_b_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1]
|
||||
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8> %a, <32 x i8> %b, i32 -1)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define i32 @test_mask_pcmpgt_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpgt_b_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x64,0xc1]
|
||||
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
declare i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8>, <32 x i8>, i32)
|
||||
|
||||
define i16 @test_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b) {
|
||||
; CHECK-LABEL: test_pcmpgt_w_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x65,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
define i16 @test_mask_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpgt_w_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x65,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
declare i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16>, <16 x i16>, i16)
|
||||
|
||||
define <8 x i32> @test_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1) {
|
||||
; CHECK-LABEL: test_cmp_b_256:
|
||||
; CHECK: ## BB#0:
|
||||
|
|
|
@ -558,3 +558,187 @@ define <8 x i32>@test_int_x86_avx512_mask_pshuf_d_256(<8 x i32> %x0, i32 %x1, <8
|
|||
%res4 = add <8 x i32> %res3, %res2
|
||||
ret <8 x i32> %res4
|
||||
}
|
||||
|
||||
define i8 @test_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b) {
|
||||
; CHECK-LABEL: test_pcmpeq_d_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
define i8 @test_mask_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpeq_d_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32>, <8 x i32>, i8)
|
||||
|
||||
define i8 @test_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b) {
|
||||
; CHECK-LABEL: test_pcmpeq_q_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
define i8 @test_mask_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpeq_q_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64>, <4 x i64>, i8)
|
||||
|
||||
define i8 @test_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b) {
|
||||
; CHECK-LABEL: test_pcmpgt_d_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x66,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
define i8 @test_mask_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpgt_d_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x66,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32>, <8 x i32>, i8)
|
||||
|
||||
define i8 @test_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b) {
|
||||
; CHECK-LABEL: test_pcmpgt_q_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x37,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
define i8 @test_mask_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpgt_q_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x37,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64>, <4 x i64>, i8)
|
||||
|
||||
define i8 @test_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b) {
|
||||
; CHECK-LABEL: test_pcmpeq_d_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
define i8 @test_mask_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpeq_d_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32>, <4 x i32>, i8)
|
||||
|
||||
define i8 @test_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-LABEL: test_pcmpeq_q_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x29,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
define i8 @test_mask_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpeq_q_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64>, <2 x i64>, i8)
|
||||
|
||||
define i8 @test_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b) {
|
||||
; CHECK-LABEL: test_pcmpgt_d_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x66,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
define i8 @test_mask_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpgt_d_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x66,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32>, <4 x i32>, i8)
|
||||
|
||||
define i8 @test_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-LABEL: test_pcmpgt_q_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x37,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
define i8 @test_mask_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpgt_q_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x37,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64>, <2 x i64>, i8)
|
||||
|
|
|
@ -1,100 +1,9 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s
|
||||
|
||||
; 256-bit
|
||||
|
||||
define i8 @test_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b) {
|
||||
; CHECK-LABEL: test_pcmpeq_d_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
define i8 @test_mask_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpeq_d_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32>, <8 x i32>, i8)
|
||||
|
||||
define i8 @test_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b) {
|
||||
; CHECK-LABEL: test_pcmpeq_q_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
define i8 @test_mask_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpeq_q_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64>, <4 x i64>, i8)
|
||||
|
||||
define i8 @test_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b) {
|
||||
; CHECK-LABEL: test_pcmpgt_d_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x66,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
define i8 @test_mask_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpgt_d_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x66,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32>, <8 x i32>, i8)
|
||||
|
||||
define i8 @test_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b) {
|
||||
; CHECK-LABEL: test_pcmpgt_q_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x37,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
define i8 @test_mask_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpgt_q_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x37,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64>, <4 x i64>, i8)
|
||||
|
||||
define <8 x i8> @test_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
; CHECK-LABEL: test_cmp_d_256:
|
||||
; CHECK: ## BB#0:
|
||||
|
@ -549,98 +458,6 @@ declare i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64>, <4 x i64>, i32, i8) nounw
|
|||
|
||||
; 128-bit
|
||||
|
||||
define i8 @test_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b) {
|
||||
; CHECK-LABEL: test_pcmpeq_d_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
define i8 @test_mask_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpeq_d_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32>, <4 x i32>, i8)
|
||||
|
||||
define i8 @test_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-LABEL: test_pcmpeq_q_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x29,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
define i8 @test_mask_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpeq_q_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64>, <2 x i64>, i8)
|
||||
|
||||
define i8 @test_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b) {
|
||||
; CHECK-LABEL: test_pcmpgt_d_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x66,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
define i8 @test_mask_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpgt_d_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x66,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32>, <4 x i32>, i8)
|
||||
|
||||
define i8 @test_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-LABEL: test_pcmpgt_q_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x37,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
define i8 @test_mask_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpgt_q_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x37,0xc1]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64>, <2 x i64>, i8)
|
||||
|
||||
define <8 x i8> @test_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
; CHECK-LABEL: test_cmp_d_128:
|
||||
; CHECK: ## BB#0:
|
||||
|
@ -7955,9 +7772,9 @@ define <8 x i32>@test_int_x86_avx512_mask_psrav8_si_const() {
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovdqa32 {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]
|
||||
; CHECK-NEXT: ## encoding: [0x62,0xf1,0x7d,0x28,0x6f,0x05,A,A,A,A]
|
||||
; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI510_0-4, kind: reloc_riprel_4byte
|
||||
; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI494_0-4, kind: reloc_riprel_4byte
|
||||
; CHECK-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x46,0x05,A,A,A,A]
|
||||
; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI510_1-4, kind: reloc_riprel_4byte
|
||||
; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI494_1-4, kind: reloc_riprel_4byte
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> <i32 2, i32 9, i32 -12, i32 23, i32 -26, i32 37, i32 -40, i32 51>, <8 x i32> <i32 1, i32 18, i32 35, i32 52, i32 69, i32 15, i32 32, i32 49>, <8 x i32> zeroinitializer, i8 -1)
|
||||
ret <8 x i32> %res
|
||||
|
@ -7988,9 +7805,9 @@ define <2 x i64>@test_int_x86_avx512_mask_psrav_q_128_const(i8 %x3) {
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovdqa64 {{.*#+}} xmm0 = [2,18446744073709551607]
|
||||
; CHECK-NEXT: ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0x05,A,A,A,A]
|
||||
; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI512_0-4, kind: reloc_riprel_4byte
|
||||
; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI496_0-4, kind: reloc_riprel_4byte
|
||||
; CHECK-NEXT: vpsravq {{.*}}(%rip), %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x46,0x05,A,A,A,A]
|
||||
; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI512_1-4, kind: reloc_riprel_4byte
|
||||
; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI496_1-4, kind: reloc_riprel_4byte
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> <i64 2, i64 -9>, <2 x i64> <i64 1, i64 90>, <2 x i64> zeroinitializer, i8 -1)
|
||||
ret <2 x i64> %res
|
||||
|
|
Loading…
Reference in New Issue