[X86] Auto upgrade VPCOM/VPCOMU intrinsics to generic integer comparisons

This causes a couple of changes in the upgrade tests as signed/unsigned eq/ne are equivalent and we constant fold true/false codes, these changes are the same as what we already do for avx512 cmp/ucmp.

Noticed while cleaning up vector integer comparison costs for PR40376.

llvm-svn: 351697
This commit is contained in:
Simon Pilgrim 2019-01-20 19:27:40 +00:00
parent 745fd9f547
commit e1143c1322
7 changed files with 113 additions and 200 deletions

View File

@ -1908,31 +1908,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_xop_vfrcz_ps_256 : GCCBuiltin<"__builtin_ia32_vfrczps256">, def int_x86_xop_vfrcz_ps_256 : GCCBuiltin<"__builtin_ia32_vfrczps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
def int_x86_xop_vpcomb :
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_xop_vpcomw :
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_xop_vpcomd :
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_xop_vpcomq :
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_xop_vpcomub :
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_xop_vpcomuw :
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_xop_vpcomud :
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_xop_vpcomuq :
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_xop_vphaddbd : def int_x86_xop_vphaddbd :
GCCBuiltin<"__builtin_ia32_vphaddbd">, GCCBuiltin<"__builtin_ia32_vphaddbd">,
Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;

View File

@ -361,8 +361,7 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
Name == "xop.vpcmov.256" || // Added in 5.0 Name == "xop.vpcmov.256" || // Added in 5.0
Name.startswith("avx512.mask.move.s") || // Added in 4.0 Name.startswith("avx512.mask.move.s") || // Added in 4.0
Name.startswith("avx512.cvtmask2") || // Added in 5.0 Name.startswith("avx512.cvtmask2") || // Added in 5.0
(Name.startswith("xop.vpcom") && // Added in 3.2 Name.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0
F->arg_size() == 2) ||
Name.startswith("xop.vprot") || // Added in 8.0 Name.startswith("xop.vprot") || // Added in 8.0
Name.startswith("avx512.prol") || // Added in 8.0 Name.startswith("avx512.prol") || // Added in 8.0
Name.startswith("avx512.pror") || // Added in 8.0 Name.startswith("avx512.pror") || // Added in 8.0
@ -2038,8 +2037,11 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
else else
llvm_unreachable("Unknown suffix"); llvm_unreachable("Unknown suffix");
Name = Name.substr(9); // strip off "xop.vpcom"
unsigned Imm; unsigned Imm;
if (CI->getNumArgOperands() == 3) {
Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
} else {
Name = Name.substr(9); // strip off "xop.vpcom"
if (Name.startswith("lt")) if (Name.startswith("lt"))
Imm = 0; Imm = 0;
else if (Name.startswith("le")) else if (Name.startswith("le"))
@ -2058,6 +2060,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Imm = 7; Imm = 7;
else else
llvm_unreachable("Unknown condition"); llvm_unreachable("Unknown condition");
}
Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned); Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
} else if (IsX86 && Name.startswith("xop.vpcmov")) { } else if (IsX86 && Name.startswith("xop.vpcmov")) {
Value *Sel = CI->getArgOperand(2); Value *Sel = CI->getArgOperand(2);

View File

@ -1205,14 +1205,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(vgf2p8mulb_512, INTR_TYPE_2OP, X86_INTRINSIC_DATA(vgf2p8mulb_512, INTR_TYPE_2OP,
X86ISD::GF2P8MULB, 0), X86ISD::GF2P8MULB, 0),
X86_INTRINSIC_DATA(xop_vpcomb, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
X86_INTRINSIC_DATA(xop_vpcomd, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
X86_INTRINSIC_DATA(xop_vpcomq, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
X86_INTRINSIC_DATA(xop_vpcomub, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
X86_INTRINSIC_DATA(xop_vpcomud, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
X86_INTRINSIC_DATA(xop_vpcomuq, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
X86_INTRINSIC_DATA(xop_vpcomuw, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
X86_INTRINSIC_DATA(xop_vpcomw, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
X86_INTRINSIC_DATA(xop_vpermil2pd, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0), X86_INTRINSIC_DATA(xop_vpermil2pd, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
X86_INTRINSIC_DATA(xop_vpermil2pd_256, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0), X86_INTRINSIC_DATA(xop_vpermil2pd_256, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
X86_INTRINSIC_DATA(xop_vpermil2ps, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0), X86_INTRINSIC_DATA(xop_vpermil2ps, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),

View File

@ -1133,45 +1133,6 @@ static Value *simplifyX86vpermv(const IntrinsicInst &II,
return Builder.CreateShuffleVector(V1, V2, ShuffleMask); return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
} }
/// Decode XOP integer vector comparison intrinsics.
static Value *simplifyX86vpcom(const IntrinsicInst &II,
InstCombiner::BuilderTy &Builder,
bool IsSigned) {
if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
uint64_t Imm = CInt->getZExtValue() & 0x7;
VectorType *VecTy = cast<VectorType>(II.getType());
CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
switch (Imm) {
case 0x0:
Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
break;
case 0x1:
Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
break;
case 0x2:
Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
break;
case 0x3:
Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
break;
case 0x4:
Pred = ICmpInst::ICMP_EQ; break;
case 0x5:
Pred = ICmpInst::ICMP_NE; break;
case 0x6:
return ConstantInt::getSigned(VecTy, 0); // FALSE
case 0x7:
return ConstantInt::getSigned(VecTy, -1); // TRUE
}
if (Value *Cmp = Builder.CreateICmp(Pred, II.getArgOperand(0),
II.getArgOperand(1)))
return Builder.CreateSExtOrTrunc(Cmp, VecTy);
}
return nullptr;
}
static bool maskIsAllOneOrUndef(Value *Mask) { static bool maskIsAllOneOrUndef(Value *Mask) {
auto *ConstMask = dyn_cast<Constant>(Mask); auto *ConstMask = dyn_cast<Constant>(Mask);
if (!ConstMask) if (!ConstMask)
@ -3167,22 +3128,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return nullptr; return nullptr;
break; break;
case Intrinsic::x86_xop_vpcomb:
case Intrinsic::x86_xop_vpcomd:
case Intrinsic::x86_xop_vpcomq:
case Intrinsic::x86_xop_vpcomw:
if (Value *V = simplifyX86vpcom(*II, Builder, true))
return replaceInstUsesWith(*II, V);
break;
case Intrinsic::x86_xop_vpcomub:
case Intrinsic::x86_xop_vpcomud:
case Intrinsic::x86_xop_vpcomuq:
case Intrinsic::x86_xop_vpcomuw:
if (Value *V = simplifyX86vpcom(*II, Builder, false))
return replaceInstUsesWith(*II, V);
break;
case Intrinsic::ppc_altivec_vperm: case Intrinsic::ppc_altivec_vperm:
// Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
// Note that ppc_altivec_vperm has a big-endian bias, so when creating // Note that ppc_altivec_vperm has a big-endian bias, so when creating

View File

@ -74,12 +74,12 @@ define <4 x i32> @commute_fold_vpcomud(<4 x i32>* %a0, <4 x i32> %a1) {
; X32-LABEL: commute_fold_vpcomud: ; X32-LABEL: commute_fold_vpcomud:
; X32: # %bb.0: ; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vpcomequd (%eax), %xmm0, %xmm0 ; X32-NEXT: vpcomeqd (%eax), %xmm0, %xmm0
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: commute_fold_vpcomud: ; X64-LABEL: commute_fold_vpcomud:
; X64: # %bb.0: ; X64: # %bb.0:
; X64-NEXT: vpcomequd (%rdi), %xmm0, %xmm0 ; X64-NEXT: vpcomeqd (%rdi), %xmm0, %xmm0
; X64-NEXT: retq ; X64-NEXT: retq
%1 = load <4 x i32>, <4 x i32>* %a0 %1 = load <4 x i32>, <4 x i32>* %a0
%2 = call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %1, <4 x i32> %a1, i8 4) ; vpcomequd %2 = call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %1, <4 x i32> %a1, i8 4) ; vpcomequd
@ -91,12 +91,12 @@ define <2 x i64> @commute_fold_vpcomuq(<2 x i64>* %a0, <2 x i64> %a1) {
; X32-LABEL: commute_fold_vpcomuq: ; X32-LABEL: commute_fold_vpcomuq:
; X32: # %bb.0: ; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vpcomnequq (%eax), %xmm0, %xmm0 ; X32-NEXT: vpcomneqq (%eax), %xmm0, %xmm0
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: commute_fold_vpcomuq: ; X64-LABEL: commute_fold_vpcomuq:
; X64: # %bb.0: ; X64: # %bb.0:
; X64-NEXT: vpcomnequq (%rdi), %xmm0, %xmm0 ; X64-NEXT: vpcomneqq (%rdi), %xmm0, %xmm0
; X64-NEXT: retq ; X64-NEXT: retq
%1 = load <2 x i64>, <2 x i64>* %a0 %1 = load <2 x i64>, <2 x i64>* %a0
%2 = call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %1, <2 x i64> %a1, i8 5) ; vpcomnequq %2 = call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %1, <2 x i64> %a1, i8 5) ; vpcomnequq
@ -107,13 +107,12 @@ declare <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64>, <2 x i64>, i8) nounwind readn
define <8 x i16> @commute_fold_vpcomuw(<8 x i16>* %a0, <8 x i16> %a1) { define <8 x i16> @commute_fold_vpcomuw(<8 x i16>* %a0, <8 x i16> %a1) {
; X32-LABEL: commute_fold_vpcomuw: ; X32-LABEL: commute_fold_vpcomuw:
; X32: # %bb.0: ; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-NEXT: vpcomfalseuw (%eax), %xmm0, %xmm0
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: commute_fold_vpcomuw: ; X64-LABEL: commute_fold_vpcomuw:
; X64: # %bb.0: ; X64: # %bb.0:
; X64-NEXT: vpcomfalseuw (%rdi), %xmm0, %xmm0 ; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-NEXT: retq ; X64-NEXT: retq
%1 = load <8 x i16>, <8 x i16>* %a0 %1 = load <8 x i16>, <8 x i16>* %a0
%2 = call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %1, <8 x i16> %a1, i8 6) ; vpcomfalseuw %2 = call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %1, <8 x i16> %a1, i8 6) ; vpcomfalseuw
@ -124,13 +123,12 @@ declare <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16>, <8 x i16>, i8) nounwind readn
define <8 x i16> @commute_fold_vpcomw(<8 x i16>* %a0, <8 x i16> %a1) { define <8 x i16> @commute_fold_vpcomw(<8 x i16>* %a0, <8 x i16> %a1) {
; X32-LABEL: commute_fold_vpcomw: ; X32-LABEL: commute_fold_vpcomw:
; X32: # %bb.0: ; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; X32-NEXT: vpcomtruew (%eax), %xmm0, %xmm0
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: commute_fold_vpcomw: ; X64-LABEL: commute_fold_vpcomw:
; X64: # %bb.0: ; X64: # %bb.0:
; X64-NEXT: vpcomtruew (%rdi), %xmm0, %xmm0 ; X64-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; X64-NEXT: retq ; X64-NEXT: retq
%1 = load <8 x i16>, <8 x i16>* %a0 %1 = load <8 x i16>, <8 x i16>* %a0
%2 = call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %1, <8 x i16> %a1, i8 7) ; vpcomtruew %2 = call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %1, <8 x i16> %a1, i8 7) ; vpcomtruew

View File

@ -726,6 +726,86 @@ define <8 x i16> @test_int_x86_xop_vpcomtruew(<8 x i16> %a0, <8 x i16> %a1) {
} }
declare <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16>, <8 x i16>) nounwind readnone declare <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16>, <8 x i16>) nounwind readnone
define <16 x i8> @test_int_x86_xop_vpcomb(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: test_int_x86_xop_vpcomb:
; CHECK: # %bb.0:
; CHECK-NEXT: vpcomltb %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %a0, <16 x i8> %a1, i8 0) ;
ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8>, <16 x i8>, i8) nounwind readnone
define <8 x i16> @test_int_x86_xop_vpcomw(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: test_int_x86_xop_vpcomw:
; CHECK: # %bb.0:
; CHECK-NEXT: vpcomltw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %a0, <8 x i16> %a1, i8 0) ;
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16>, <8 x i16>, i8) nounwind readnone
define <4 x i32> @test_int_x86_xop_vpcomd(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: test_int_x86_xop_vpcomd:
; CHECK: # %bb.0:
; CHECK-NEXT: vpcomltd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %a0, <4 x i32> %a1, i8 0) ;
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32>, <4 x i32>, i8) nounwind readnone
define <2 x i64> @test_int_x86_xop_vpcomq(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK-LABEL: test_int_x86_xop_vpcomq:
; CHECK: # %bb.0:
; CHECK-NEXT: vpcomltq %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ;
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64>, <2 x i64>, i8) nounwind readnone
define <16 x i8> @test_int_x86_xop_vpcomub(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: test_int_x86_xop_vpcomub:
; CHECK: # %bb.0:
; CHECK-NEXT: vpcomltub %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %a0, <16 x i8> %a1, i8 0) ;
ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8>, <16 x i8>, i8) nounwind readnone
define <8 x i16> @test_int_x86_xop_vpcomuw(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: test_int_x86_xop_vpcomuw:
; CHECK: # %bb.0:
; CHECK-NEXT: vpcomltuw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %a0, <8 x i16> %a1, i8 0) ;
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16>, <8 x i16>, i8) nounwind readnone
define <4 x i32> @test_int_x86_xop_vpcomud(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: test_int_x86_xop_vpcomud:
; CHECK: # %bb.0:
; CHECK-NEXT: vpcomltud %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %a0, <4 x i32> %a1, i8 0) ;
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32>, <4 x i32>, i8) nounwind readnone
define <2 x i64> @test_int_x86_xop_vpcomuq(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK-LABEL: test_int_x86_xop_vpcomuq:
; CHECK: # %bb.0:
; CHECK-NEXT: vpcomltuq %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ;
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64>, <2 x i64>, i8) nounwind readnone
define <2 x i64> @test_int_x86_xop_vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) { define <2 x i64> @test_int_x86_xop_vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) {
; CHECK-LABEL: test_int_x86_xop_vpcmov: ; CHECK-LABEL: test_int_x86_xop_vpcmov:
; CHECK: # %bb.0: ; CHECK: # %bb.0:

View File

@ -663,84 +663,3 @@ define <8 x float> @test_int_x86_xop_vfrcz_ps_256_mem(<8 x float>* %a0) {
ret <8 x float> %res ret <8 x float> %res
} }
declare <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float>) nounwind readnone declare <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float>) nounwind readnone
define <16 x i8> @test_int_x86_xop_vpcomb(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: test_int_x86_xop_vpcomb:
; CHECK: # %bb.0:
; CHECK-NEXT: vpcomltb %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %a0, <16 x i8> %a1, i8 0) ;
ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8>, <16 x i8>, i8) nounwind readnone
define <8 x i16> @test_int_x86_xop_vpcomw(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: test_int_x86_xop_vpcomw:
; CHECK: # %bb.0:
; CHECK-NEXT: vpcomltw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %a0, <8 x i16> %a1, i8 0) ;
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16>, <8 x i16>, i8) nounwind readnone
define <4 x i32> @test_int_x86_xop_vpcomd(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: test_int_x86_xop_vpcomd:
; CHECK: # %bb.0:
; CHECK-NEXT: vpcomltd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %a0, <4 x i32> %a1, i8 0) ;
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32>, <4 x i32>, i8) nounwind readnone
define <2 x i64> @test_int_x86_xop_vpcomq(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK-LABEL: test_int_x86_xop_vpcomq:
; CHECK: # %bb.0:
; CHECK-NEXT: vpcomltq %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ;
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64>, <2 x i64>, i8) nounwind readnone
define <16 x i8> @test_int_x86_xop_vpcomub(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: test_int_x86_xop_vpcomub:
; CHECK: # %bb.0:
; CHECK-NEXT: vpcomltub %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %a0, <16 x i8> %a1, i8 0) ;
ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8>, <16 x i8>, i8) nounwind readnone
define <8 x i16> @test_int_x86_xop_vpcomuw(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: test_int_x86_xop_vpcomuw:
; CHECK: # %bb.0:
; CHECK-NEXT: vpcomltuw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %a0, <8 x i16> %a1, i8 0) ;
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16>, <8 x i16>, i8) nounwind readnone
define <4 x i32> @test_int_x86_xop_vpcomud(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: test_int_x86_xop_vpcomud:
; CHECK: # %bb.0:
; CHECK-NEXT: vpcomltud %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %a0, <4 x i32> %a1, i8 0) ;
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32>, <4 x i32>, i8) nounwind readnone
define <2 x i64> @test_int_x86_xop_vpcomuq(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK-LABEL: test_int_x86_xop_vpcomuq:
; CHECK: # %bb.0:
; CHECK-NEXT: vpcomltuq %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ;
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64>, <2 x i64>, i8) nounwind readnone