forked from OSchip/llvm-project
[X86] Auto upgrade VPCOM/VPCOMU intrinsics to generic integer comparisons
This causes a couple of changes in the upgrade tests as signed/unsigned eq/ne are equivalent and we constant fold true/false codes, these changes are the same as what we already do for avx512 cmp/ucmp. Noticed while cleaning up vector integer comparison costs for PR40376. llvm-svn: 351697
This commit is contained in:
parent
745fd9f547
commit
e1143c1322
|
@ -1908,31 +1908,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||||
def int_x86_xop_vfrcz_ps_256 : GCCBuiltin<"__builtin_ia32_vfrczps256">,
|
def int_x86_xop_vfrcz_ps_256 : GCCBuiltin<"__builtin_ia32_vfrczps256">,
|
||||||
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
|
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
|
||||||
|
|
||||||
def int_x86_xop_vpcomb :
|
|
||||||
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
|
|
||||||
llvm_i8_ty], [IntrNoMem]>;
|
|
||||||
def int_x86_xop_vpcomw :
|
|
||||||
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
|
|
||||||
llvm_i8_ty], [IntrNoMem]>;
|
|
||||||
def int_x86_xop_vpcomd :
|
|
||||||
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
|
|
||||||
llvm_i8_ty], [IntrNoMem]>;
|
|
||||||
def int_x86_xop_vpcomq :
|
|
||||||
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
|
|
||||||
llvm_i8_ty], [IntrNoMem]>;
|
|
||||||
def int_x86_xop_vpcomub :
|
|
||||||
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
|
|
||||||
llvm_i8_ty], [IntrNoMem]>;
|
|
||||||
def int_x86_xop_vpcomuw :
|
|
||||||
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
|
|
||||||
llvm_i8_ty], [IntrNoMem]>;
|
|
||||||
def int_x86_xop_vpcomud :
|
|
||||||
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
|
|
||||||
llvm_i8_ty], [IntrNoMem]>;
|
|
||||||
def int_x86_xop_vpcomuq :
|
|
||||||
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
|
|
||||||
llvm_i8_ty], [IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_xop_vphaddbd :
|
def int_x86_xop_vphaddbd :
|
||||||
GCCBuiltin<"__builtin_ia32_vphaddbd">,
|
GCCBuiltin<"__builtin_ia32_vphaddbd">,
|
||||||
Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
|
Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
|
||||||
|
|
|
@ -361,8 +361,7 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
|
||||||
Name == "xop.vpcmov.256" || // Added in 5.0
|
Name == "xop.vpcmov.256" || // Added in 5.0
|
||||||
Name.startswith("avx512.mask.move.s") || // Added in 4.0
|
Name.startswith("avx512.mask.move.s") || // Added in 4.0
|
||||||
Name.startswith("avx512.cvtmask2") || // Added in 5.0
|
Name.startswith("avx512.cvtmask2") || // Added in 5.0
|
||||||
(Name.startswith("xop.vpcom") && // Added in 3.2
|
Name.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0
|
||||||
F->arg_size() == 2) ||
|
|
||||||
Name.startswith("xop.vprot") || // Added in 8.0
|
Name.startswith("xop.vprot") || // Added in 8.0
|
||||||
Name.startswith("avx512.prol") || // Added in 8.0
|
Name.startswith("avx512.prol") || // Added in 8.0
|
||||||
Name.startswith("avx512.pror") || // Added in 8.0
|
Name.startswith("avx512.pror") || // Added in 8.0
|
||||||
|
@ -2038,26 +2037,31 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||||
else
|
else
|
||||||
llvm_unreachable("Unknown suffix");
|
llvm_unreachable("Unknown suffix");
|
||||||
|
|
||||||
Name = Name.substr(9); // strip off "xop.vpcom"
|
|
||||||
unsigned Imm;
|
unsigned Imm;
|
||||||
if (Name.startswith("lt"))
|
if (CI->getNumArgOperands() == 3) {
|
||||||
Imm = 0;
|
Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
|
||||||
else if (Name.startswith("le"))
|
} else {
|
||||||
Imm = 1;
|
Name = Name.substr(9); // strip off "xop.vpcom"
|
||||||
else if (Name.startswith("gt"))
|
if (Name.startswith("lt"))
|
||||||
Imm = 2;
|
Imm = 0;
|
||||||
else if (Name.startswith("ge"))
|
else if (Name.startswith("le"))
|
||||||
Imm = 3;
|
Imm = 1;
|
||||||
else if (Name.startswith("eq"))
|
else if (Name.startswith("gt"))
|
||||||
Imm = 4;
|
Imm = 2;
|
||||||
else if (Name.startswith("ne"))
|
else if (Name.startswith("ge"))
|
||||||
Imm = 5;
|
Imm = 3;
|
||||||
else if (Name.startswith("false"))
|
else if (Name.startswith("eq"))
|
||||||
Imm = 6;
|
Imm = 4;
|
||||||
else if (Name.startswith("true"))
|
else if (Name.startswith("ne"))
|
||||||
Imm = 7;
|
Imm = 5;
|
||||||
else
|
else if (Name.startswith("false"))
|
||||||
llvm_unreachable("Unknown condition");
|
Imm = 6;
|
||||||
|
else if (Name.startswith("true"))
|
||||||
|
Imm = 7;
|
||||||
|
else
|
||||||
|
llvm_unreachable("Unknown condition");
|
||||||
|
}
|
||||||
|
|
||||||
Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
|
Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
|
||||||
} else if (IsX86 && Name.startswith("xop.vpcmov")) {
|
} else if (IsX86 && Name.startswith("xop.vpcmov")) {
|
||||||
Value *Sel = CI->getArgOperand(2);
|
Value *Sel = CI->getArgOperand(2);
|
||||||
|
|
|
@ -1205,14 +1205,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||||
X86_INTRINSIC_DATA(vgf2p8mulb_512, INTR_TYPE_2OP,
|
X86_INTRINSIC_DATA(vgf2p8mulb_512, INTR_TYPE_2OP,
|
||||||
X86ISD::GF2P8MULB, 0),
|
X86ISD::GF2P8MULB, 0),
|
||||||
|
|
||||||
X86_INTRINSIC_DATA(xop_vpcomb, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
|
|
||||||
X86_INTRINSIC_DATA(xop_vpcomd, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
|
|
||||||
X86_INTRINSIC_DATA(xop_vpcomq, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
|
|
||||||
X86_INTRINSIC_DATA(xop_vpcomub, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
|
|
||||||
X86_INTRINSIC_DATA(xop_vpcomud, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
|
|
||||||
X86_INTRINSIC_DATA(xop_vpcomuq, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
|
|
||||||
X86_INTRINSIC_DATA(xop_vpcomuw, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
|
|
||||||
X86_INTRINSIC_DATA(xop_vpcomw, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
|
|
||||||
X86_INTRINSIC_DATA(xop_vpermil2pd, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
|
X86_INTRINSIC_DATA(xop_vpermil2pd, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
|
||||||
X86_INTRINSIC_DATA(xop_vpermil2pd_256, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
|
X86_INTRINSIC_DATA(xop_vpermil2pd_256, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
|
||||||
X86_INTRINSIC_DATA(xop_vpermil2ps, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
|
X86_INTRINSIC_DATA(xop_vpermil2ps, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
|
||||||
|
|
|
@ -1133,45 +1133,6 @@ static Value *simplifyX86vpermv(const IntrinsicInst &II,
|
||||||
return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
|
return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Decode XOP integer vector comparison intrinsics.
|
|
||||||
static Value *simplifyX86vpcom(const IntrinsicInst &II,
|
|
||||||
InstCombiner::BuilderTy &Builder,
|
|
||||||
bool IsSigned) {
|
|
||||||
if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
|
|
||||||
uint64_t Imm = CInt->getZExtValue() & 0x7;
|
|
||||||
VectorType *VecTy = cast<VectorType>(II.getType());
|
|
||||||
CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
|
|
||||||
|
|
||||||
switch (Imm) {
|
|
||||||
case 0x0:
|
|
||||||
Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
|
|
||||||
break;
|
|
||||||
case 0x1:
|
|
||||||
Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
|
|
||||||
break;
|
|
||||||
case 0x2:
|
|
||||||
Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
|
|
||||||
break;
|
|
||||||
case 0x3:
|
|
||||||
Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
|
|
||||||
break;
|
|
||||||
case 0x4:
|
|
||||||
Pred = ICmpInst::ICMP_EQ; break;
|
|
||||||
case 0x5:
|
|
||||||
Pred = ICmpInst::ICMP_NE; break;
|
|
||||||
case 0x6:
|
|
||||||
return ConstantInt::getSigned(VecTy, 0); // FALSE
|
|
||||||
case 0x7:
|
|
||||||
return ConstantInt::getSigned(VecTy, -1); // TRUE
|
|
||||||
}
|
|
||||||
|
|
||||||
if (Value *Cmp = Builder.CreateICmp(Pred, II.getArgOperand(0),
|
|
||||||
II.getArgOperand(1)))
|
|
||||||
return Builder.CreateSExtOrTrunc(Cmp, VecTy);
|
|
||||||
}
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool maskIsAllOneOrUndef(Value *Mask) {
|
static bool maskIsAllOneOrUndef(Value *Mask) {
|
||||||
auto *ConstMask = dyn_cast<Constant>(Mask);
|
auto *ConstMask = dyn_cast<Constant>(Mask);
|
||||||
if (!ConstMask)
|
if (!ConstMask)
|
||||||
|
@ -3167,22 +3128,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case Intrinsic::x86_xop_vpcomb:
|
|
||||||
case Intrinsic::x86_xop_vpcomd:
|
|
||||||
case Intrinsic::x86_xop_vpcomq:
|
|
||||||
case Intrinsic::x86_xop_vpcomw:
|
|
||||||
if (Value *V = simplifyX86vpcom(*II, Builder, true))
|
|
||||||
return replaceInstUsesWith(*II, V);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case Intrinsic::x86_xop_vpcomub:
|
|
||||||
case Intrinsic::x86_xop_vpcomud:
|
|
||||||
case Intrinsic::x86_xop_vpcomuq:
|
|
||||||
case Intrinsic::x86_xop_vpcomuw:
|
|
||||||
if (Value *V = simplifyX86vpcom(*II, Builder, false))
|
|
||||||
return replaceInstUsesWith(*II, V);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case Intrinsic::ppc_altivec_vperm:
|
case Intrinsic::ppc_altivec_vperm:
|
||||||
// Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
|
// Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
|
||||||
// Note that ppc_altivec_vperm has a big-endian bias, so when creating
|
// Note that ppc_altivec_vperm has a big-endian bias, so when creating
|
||||||
|
|
|
@ -74,12 +74,12 @@ define <4 x i32> @commute_fold_vpcomud(<4 x i32>* %a0, <4 x i32> %a1) {
|
||||||
; X32-LABEL: commute_fold_vpcomud:
|
; X32-LABEL: commute_fold_vpcomud:
|
||||||
; X32: # %bb.0:
|
; X32: # %bb.0:
|
||||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X32-NEXT: vpcomequd (%eax), %xmm0, %xmm0
|
; X32-NEXT: vpcomeqd (%eax), %xmm0, %xmm0
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: commute_fold_vpcomud:
|
; X64-LABEL: commute_fold_vpcomud:
|
||||||
; X64: # %bb.0:
|
; X64: # %bb.0:
|
||||||
; X64-NEXT: vpcomequd (%rdi), %xmm0, %xmm0
|
; X64-NEXT: vpcomeqd (%rdi), %xmm0, %xmm0
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
%1 = load <4 x i32>, <4 x i32>* %a0
|
%1 = load <4 x i32>, <4 x i32>* %a0
|
||||||
%2 = call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %1, <4 x i32> %a1, i8 4) ; vpcomequd
|
%2 = call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %1, <4 x i32> %a1, i8 4) ; vpcomequd
|
||||||
|
@ -91,12 +91,12 @@ define <2 x i64> @commute_fold_vpcomuq(<2 x i64>* %a0, <2 x i64> %a1) {
|
||||||
; X32-LABEL: commute_fold_vpcomuq:
|
; X32-LABEL: commute_fold_vpcomuq:
|
||||||
; X32: # %bb.0:
|
; X32: # %bb.0:
|
||||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X32-NEXT: vpcomnequq (%eax), %xmm0, %xmm0
|
; X32-NEXT: vpcomneqq (%eax), %xmm0, %xmm0
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: commute_fold_vpcomuq:
|
; X64-LABEL: commute_fold_vpcomuq:
|
||||||
; X64: # %bb.0:
|
; X64: # %bb.0:
|
||||||
; X64-NEXT: vpcomnequq (%rdi), %xmm0, %xmm0
|
; X64-NEXT: vpcomneqq (%rdi), %xmm0, %xmm0
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
%1 = load <2 x i64>, <2 x i64>* %a0
|
%1 = load <2 x i64>, <2 x i64>* %a0
|
||||||
%2 = call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %1, <2 x i64> %a1, i8 5) ; vpcomnequq
|
%2 = call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %1, <2 x i64> %a1, i8 5) ; vpcomnequq
|
||||||
|
@ -107,13 +107,12 @@ declare <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64>, <2 x i64>, i8) nounwind readn
|
||||||
define <8 x i16> @commute_fold_vpcomuw(<8 x i16>* %a0, <8 x i16> %a1) {
|
define <8 x i16> @commute_fold_vpcomuw(<8 x i16>* %a0, <8 x i16> %a1) {
|
||||||
; X32-LABEL: commute_fold_vpcomuw:
|
; X32-LABEL: commute_fold_vpcomuw:
|
||||||
; X32: # %bb.0:
|
; X32: # %bb.0:
|
||||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||||
; X32-NEXT: vpcomfalseuw (%eax), %xmm0, %xmm0
|
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: commute_fold_vpcomuw:
|
; X64-LABEL: commute_fold_vpcomuw:
|
||||||
; X64: # %bb.0:
|
; X64: # %bb.0:
|
||||||
; X64-NEXT: vpcomfalseuw (%rdi), %xmm0, %xmm0
|
; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
%1 = load <8 x i16>, <8 x i16>* %a0
|
%1 = load <8 x i16>, <8 x i16>* %a0
|
||||||
%2 = call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %1, <8 x i16> %a1, i8 6) ; vpcomfalseuw
|
%2 = call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %1, <8 x i16> %a1, i8 6) ; vpcomfalseuw
|
||||||
|
@ -124,13 +123,12 @@ declare <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16>, <8 x i16>, i8) nounwind readn
|
||||||
define <8 x i16> @commute_fold_vpcomw(<8 x i16>* %a0, <8 x i16> %a1) {
|
define <8 x i16> @commute_fold_vpcomw(<8 x i16>* %a0, <8 x i16> %a1) {
|
||||||
; X32-LABEL: commute_fold_vpcomw:
|
; X32-LABEL: commute_fold_vpcomw:
|
||||||
; X32: # %bb.0:
|
; X32: # %bb.0:
|
||||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X32-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||||
; X32-NEXT: vpcomtruew (%eax), %xmm0, %xmm0
|
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: commute_fold_vpcomw:
|
; X64-LABEL: commute_fold_vpcomw:
|
||||||
; X64: # %bb.0:
|
; X64: # %bb.0:
|
||||||
; X64-NEXT: vpcomtruew (%rdi), %xmm0, %xmm0
|
; X64-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
%1 = load <8 x i16>, <8 x i16>* %a0
|
%1 = load <8 x i16>, <8 x i16>* %a0
|
||||||
%2 = call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %1, <8 x i16> %a1, i8 7) ; vpcomtruew
|
%2 = call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %1, <8 x i16> %a1, i8 7) ; vpcomtruew
|
||||||
|
|
|
@ -726,6 +726,86 @@ define <8 x i16> @test_int_x86_xop_vpcomtruew(<8 x i16> %a0, <8 x i16> %a1) {
|
||||||
}
|
}
|
||||||
declare <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16>, <8 x i16>) nounwind readnone
|
declare <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16>, <8 x i16>) nounwind readnone
|
||||||
|
|
||||||
|
define <16 x i8> @test_int_x86_xop_vpcomb(<16 x i8> %a0, <16 x i8> %a1) {
|
||||||
|
; CHECK-LABEL: test_int_x86_xop_vpcomb:
|
||||||
|
; CHECK: # %bb.0:
|
||||||
|
; CHECK-NEXT: vpcomltb %xmm1, %xmm0, %xmm0
|
||||||
|
; CHECK-NEXT: retq
|
||||||
|
%res = call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %a0, <16 x i8> %a1, i8 0) ;
|
||||||
|
ret <16 x i8> %res
|
||||||
|
}
|
||||||
|
declare <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8>, <16 x i8>, i8) nounwind readnone
|
||||||
|
|
||||||
|
define <8 x i16> @test_int_x86_xop_vpcomw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||||
|
; CHECK-LABEL: test_int_x86_xop_vpcomw:
|
||||||
|
; CHECK: # %bb.0:
|
||||||
|
; CHECK-NEXT: vpcomltw %xmm1, %xmm0, %xmm0
|
||||||
|
; CHECK-NEXT: retq
|
||||||
|
%res = call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %a0, <8 x i16> %a1, i8 0) ;
|
||||||
|
ret <8 x i16> %res
|
||||||
|
}
|
||||||
|
declare <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16>, <8 x i16>, i8) nounwind readnone
|
||||||
|
|
||||||
|
define <4 x i32> @test_int_x86_xop_vpcomd(<4 x i32> %a0, <4 x i32> %a1) {
|
||||||
|
; CHECK-LABEL: test_int_x86_xop_vpcomd:
|
||||||
|
; CHECK: # %bb.0:
|
||||||
|
; CHECK-NEXT: vpcomltd %xmm1, %xmm0, %xmm0
|
||||||
|
; CHECK-NEXT: retq
|
||||||
|
%res = call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %a0, <4 x i32> %a1, i8 0) ;
|
||||||
|
ret <4 x i32> %res
|
||||||
|
}
|
||||||
|
declare <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32>, <4 x i32>, i8) nounwind readnone
|
||||||
|
|
||||||
|
define <2 x i64> @test_int_x86_xop_vpcomq(<2 x i64> %a0, <2 x i64> %a1) {
|
||||||
|
; CHECK-LABEL: test_int_x86_xop_vpcomq:
|
||||||
|
; CHECK: # %bb.0:
|
||||||
|
; CHECK-NEXT: vpcomltq %xmm1, %xmm0, %xmm0
|
||||||
|
; CHECK-NEXT: retq
|
||||||
|
%res = call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ;
|
||||||
|
ret <2 x i64> %res
|
||||||
|
}
|
||||||
|
declare <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64>, <2 x i64>, i8) nounwind readnone
|
||||||
|
|
||||||
|
define <16 x i8> @test_int_x86_xop_vpcomub(<16 x i8> %a0, <16 x i8> %a1) {
|
||||||
|
; CHECK-LABEL: test_int_x86_xop_vpcomub:
|
||||||
|
; CHECK: # %bb.0:
|
||||||
|
; CHECK-NEXT: vpcomltub %xmm1, %xmm0, %xmm0
|
||||||
|
; CHECK-NEXT: retq
|
||||||
|
%res = call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %a0, <16 x i8> %a1, i8 0) ;
|
||||||
|
ret <16 x i8> %res
|
||||||
|
}
|
||||||
|
declare <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8>, <16 x i8>, i8) nounwind readnone
|
||||||
|
|
||||||
|
define <8 x i16> @test_int_x86_xop_vpcomuw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||||
|
; CHECK-LABEL: test_int_x86_xop_vpcomuw:
|
||||||
|
; CHECK: # %bb.0:
|
||||||
|
; CHECK-NEXT: vpcomltuw %xmm1, %xmm0, %xmm0
|
||||||
|
; CHECK-NEXT: retq
|
||||||
|
%res = call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %a0, <8 x i16> %a1, i8 0) ;
|
||||||
|
ret <8 x i16> %res
|
||||||
|
}
|
||||||
|
declare <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16>, <8 x i16>, i8) nounwind readnone
|
||||||
|
|
||||||
|
define <4 x i32> @test_int_x86_xop_vpcomud(<4 x i32> %a0, <4 x i32> %a1) {
|
||||||
|
; CHECK-LABEL: test_int_x86_xop_vpcomud:
|
||||||
|
; CHECK: # %bb.0:
|
||||||
|
; CHECK-NEXT: vpcomltud %xmm1, %xmm0, %xmm0
|
||||||
|
; CHECK-NEXT: retq
|
||||||
|
%res = call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %a0, <4 x i32> %a1, i8 0) ;
|
||||||
|
ret <4 x i32> %res
|
||||||
|
}
|
||||||
|
declare <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32>, <4 x i32>, i8) nounwind readnone
|
||||||
|
|
||||||
|
define <2 x i64> @test_int_x86_xop_vpcomuq(<2 x i64> %a0, <2 x i64> %a1) {
|
||||||
|
; CHECK-LABEL: test_int_x86_xop_vpcomuq:
|
||||||
|
; CHECK: # %bb.0:
|
||||||
|
; CHECK-NEXT: vpcomltuq %xmm1, %xmm0, %xmm0
|
||||||
|
; CHECK-NEXT: retq
|
||||||
|
%res = call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ;
|
||||||
|
ret <2 x i64> %res
|
||||||
|
}
|
||||||
|
declare <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64>, <2 x i64>, i8) nounwind readnone
|
||||||
|
|
||||||
define <2 x i64> @test_int_x86_xop_vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) {
|
define <2 x i64> @test_int_x86_xop_vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) {
|
||||||
; CHECK-LABEL: test_int_x86_xop_vpcmov:
|
; CHECK-LABEL: test_int_x86_xop_vpcmov:
|
||||||
; CHECK: # %bb.0:
|
; CHECK: # %bb.0:
|
||||||
|
|
|
@ -663,84 +663,3 @@ define <8 x float> @test_int_x86_xop_vfrcz_ps_256_mem(<8 x float>* %a0) {
|
||||||
ret <8 x float> %res
|
ret <8 x float> %res
|
||||||
}
|
}
|
||||||
declare <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float>) nounwind readnone
|
declare <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float>) nounwind readnone
|
||||||
|
|
||||||
define <16 x i8> @test_int_x86_xop_vpcomb(<16 x i8> %a0, <16 x i8> %a1) {
|
|
||||||
; CHECK-LABEL: test_int_x86_xop_vpcomb:
|
|
||||||
; CHECK: # %bb.0:
|
|
||||||
; CHECK-NEXT: vpcomltb %xmm1, %xmm0, %xmm0
|
|
||||||
; CHECK-NEXT: retq
|
|
||||||
%res = call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %a0, <16 x i8> %a1, i8 0) ;
|
|
||||||
ret <16 x i8> %res
|
|
||||||
}
|
|
||||||
declare <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8>, <16 x i8>, i8) nounwind readnone
|
|
||||||
|
|
||||||
define <8 x i16> @test_int_x86_xop_vpcomw(<8 x i16> %a0, <8 x i16> %a1) {
|
|
||||||
; CHECK-LABEL: test_int_x86_xop_vpcomw:
|
|
||||||
; CHECK: # %bb.0:
|
|
||||||
; CHECK-NEXT: vpcomltw %xmm1, %xmm0, %xmm0
|
|
||||||
; CHECK-NEXT: retq
|
|
||||||
%res = call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %a0, <8 x i16> %a1, i8 0) ;
|
|
||||||
ret <8 x i16> %res
|
|
||||||
}
|
|
||||||
declare <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16>, <8 x i16>, i8) nounwind readnone
|
|
||||||
|
|
||||||
define <4 x i32> @test_int_x86_xop_vpcomd(<4 x i32> %a0, <4 x i32> %a1) {
|
|
||||||
; CHECK-LABEL: test_int_x86_xop_vpcomd:
|
|
||||||
; CHECK: # %bb.0:
|
|
||||||
; CHECK-NEXT: vpcomltd %xmm1, %xmm0, %xmm0
|
|
||||||
; CHECK-NEXT: retq
|
|
||||||
%res = call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %a0, <4 x i32> %a1, i8 0) ;
|
|
||||||
ret <4 x i32> %res
|
|
||||||
}
|
|
||||||
declare <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32>, <4 x i32>, i8) nounwind readnone
|
|
||||||
|
|
||||||
define <2 x i64> @test_int_x86_xop_vpcomq(<2 x i64> %a0, <2 x i64> %a1) {
|
|
||||||
; CHECK-LABEL: test_int_x86_xop_vpcomq:
|
|
||||||
; CHECK: # %bb.0:
|
|
||||||
; CHECK-NEXT: vpcomltq %xmm1, %xmm0, %xmm0
|
|
||||||
; CHECK-NEXT: retq
|
|
||||||
%res = call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ;
|
|
||||||
ret <2 x i64> %res
|
|
||||||
}
|
|
||||||
declare <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64>, <2 x i64>, i8) nounwind readnone
|
|
||||||
|
|
||||||
define <16 x i8> @test_int_x86_xop_vpcomub(<16 x i8> %a0, <16 x i8> %a1) {
|
|
||||||
; CHECK-LABEL: test_int_x86_xop_vpcomub:
|
|
||||||
; CHECK: # %bb.0:
|
|
||||||
; CHECK-NEXT: vpcomltub %xmm1, %xmm0, %xmm0
|
|
||||||
; CHECK-NEXT: retq
|
|
||||||
%res = call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %a0, <16 x i8> %a1, i8 0) ;
|
|
||||||
ret <16 x i8> %res
|
|
||||||
}
|
|
||||||
declare <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8>, <16 x i8>, i8) nounwind readnone
|
|
||||||
|
|
||||||
define <8 x i16> @test_int_x86_xop_vpcomuw(<8 x i16> %a0, <8 x i16> %a1) {
|
|
||||||
; CHECK-LABEL: test_int_x86_xop_vpcomuw:
|
|
||||||
; CHECK: # %bb.0:
|
|
||||||
; CHECK-NEXT: vpcomltuw %xmm1, %xmm0, %xmm0
|
|
||||||
; CHECK-NEXT: retq
|
|
||||||
%res = call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %a0, <8 x i16> %a1, i8 0) ;
|
|
||||||
ret <8 x i16> %res
|
|
||||||
}
|
|
||||||
declare <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16>, <8 x i16>, i8) nounwind readnone
|
|
||||||
|
|
||||||
define <4 x i32> @test_int_x86_xop_vpcomud(<4 x i32> %a0, <4 x i32> %a1) {
|
|
||||||
; CHECK-LABEL: test_int_x86_xop_vpcomud:
|
|
||||||
; CHECK: # %bb.0:
|
|
||||||
; CHECK-NEXT: vpcomltud %xmm1, %xmm0, %xmm0
|
|
||||||
; CHECK-NEXT: retq
|
|
||||||
%res = call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %a0, <4 x i32> %a1, i8 0) ;
|
|
||||||
ret <4 x i32> %res
|
|
||||||
}
|
|
||||||
declare <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32>, <4 x i32>, i8) nounwind readnone
|
|
||||||
|
|
||||||
define <2 x i64> @test_int_x86_xop_vpcomuq(<2 x i64> %a0, <2 x i64> %a1) {
|
|
||||||
; CHECK-LABEL: test_int_x86_xop_vpcomuq:
|
|
||||||
; CHECK: # %bb.0:
|
|
||||||
; CHECK-NEXT: vpcomltuq %xmm1, %xmm0, %xmm0
|
|
||||||
; CHECK-NEXT: retq
|
|
||||||
%res = call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ;
|
|
||||||
ret <2 x i64> %res
|
|
||||||
}
|
|
||||||
declare <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64>, <2 x i64>, i8) nounwind readnone
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue