forked from OSchip/llvm-project
[InstCombine][X86][XOP] Combine XOP integer vector comparisons to native IR
We now have lowering support for XOP PCOM/PCOMU instructions. llvm-svn: 249977
This commit is contained in:
parent
52d47e5704
commit
1d1c56e2df
|
@ -446,6 +446,43 @@ static Value *SimplifyX86vperm2(const IntrinsicInst &II,
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
/// Decode XOP integer vector comparison intrinsics.
|
||||
static Value *SimplifyX86vpcom(const IntrinsicInst &II,
|
||||
InstCombiner::BuilderTy &Builder, bool IsSigned) {
|
||||
if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
|
||||
uint64_t Imm = CInt->getZExtValue() & 0x7;
|
||||
VectorType *VecTy = cast<VectorType>(II.getType());
|
||||
CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
|
||||
|
||||
switch (Imm) {
|
||||
case 0x0:
|
||||
Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
|
||||
break;
|
||||
case 0x1:
|
||||
Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
|
||||
break;
|
||||
case 0x2:
|
||||
Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
|
||||
break;
|
||||
case 0x3:
|
||||
Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
|
||||
break;
|
||||
case 0x4:
|
||||
Pred = ICmpInst::ICMP_EQ; break;
|
||||
case 0x5:
|
||||
Pred = ICmpInst::ICMP_NE; break;
|
||||
case 0x6:
|
||||
return ConstantInt::getSigned(VecTy, 0); // FALSE
|
||||
case 0x7:
|
||||
return ConstantInt::getSigned(VecTy, -1); // TRUE
|
||||
}
|
||||
|
||||
if (Value *Cmp = Builder.CreateICmp(Pred, II.getArgOperand(0), II.getArgOperand(1)))
|
||||
return Builder.CreateSExtOrTrunc(Cmp, VecTy);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// visitCallInst - CallInst simplification. This mostly only handles folding
|
||||
/// of intrinsic instructions. For normal calls, it allows visitCallSite to do
|
||||
/// the heavy lifting.
|
||||
|
@ -1252,6 +1289,22 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
|||
return ReplaceInstUsesWith(*II, V);
|
||||
break;
|
||||
|
||||
case Intrinsic::x86_xop_vpcomb:
|
||||
case Intrinsic::x86_xop_vpcomd:
|
||||
case Intrinsic::x86_xop_vpcomq:
|
||||
case Intrinsic::x86_xop_vpcomw:
|
||||
if (Value *V = SimplifyX86vpcom(*II, *Builder, true))
|
||||
return ReplaceInstUsesWith(*II, V);
|
||||
break;
|
||||
|
||||
case Intrinsic::x86_xop_vpcomub:
|
||||
case Intrinsic::x86_xop_vpcomud:
|
||||
case Intrinsic::x86_xop_vpcomuq:
|
||||
case Intrinsic::x86_xop_vpcomuw:
|
||||
if (Value *V = SimplifyX86vpcom(*II, *Builder, false))
|
||||
return ReplaceInstUsesWith(*II, V);
|
||||
break;
|
||||
|
||||
case Intrinsic::ppc_altivec_vperm:
|
||||
// Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
|
||||
// Note that ppc_altivec_vperm has a big-endian bias, so when creating
|
||||
|
|
|
@ -0,0 +1,209 @@
|
|||
; RUN: opt < %s -instcombine -S | FileCheck %s
|
||||
|
||||
define <2 x i64> @cmp_slt_v2i64(<2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-LABEL: @cmp_slt_v2i64
|
||||
; CHECK-NEXT: %1 = icmp slt <2 x i64> %a, %b
|
||||
; CHECK-NEXT: %2 = sext <2 x i1> %1 to <2 x i64>
|
||||
; CHECK-NEXT: ret <2 x i64> %2
|
||||
%1 = tail call <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64> %a, <2 x i64> %b)
|
||||
ret <2 x i64> %1
|
||||
}
|
||||
|
||||
define <2 x i64> @cmp_ult_v2i64(<2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-LABEL: @cmp_ult_v2i64
|
||||
; CHECK-NEXT: %1 = icmp ult <2 x i64> %a, %b
|
||||
; CHECK-NEXT: %2 = sext <2 x i1> %1 to <2 x i64>
|
||||
; CHECK-NEXT: ret <2 x i64> %2
|
||||
%1 = tail call <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64> %a, <2 x i64> %b)
|
||||
ret <2 x i64> %1
|
||||
}
|
||||
|
||||
define <2 x i64> @cmp_sle_v2i64(<2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-LABEL: @cmp_sle_v2i64
|
||||
; CHECK-NEXT: %1 = icmp sle <2 x i64> %a, %b
|
||||
; CHECK-NEXT: %2 = sext <2 x i1> %1 to <2 x i64>
|
||||
; CHECK-NEXT: ret <2 x i64> %2
|
||||
%1 = tail call <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64> %a, <2 x i64> %b)
|
||||
ret <2 x i64> %1
|
||||
}
|
||||
|
||||
define <2 x i64> @cmp_ule_v2i64(<2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-LABEL: @cmp_ule_v2i64
|
||||
; CHECK-NEXT: %1 = icmp ule <2 x i64> %a, %b
|
||||
; CHECK-NEXT: %2 = sext <2 x i1> %1 to <2 x i64>
|
||||
; CHECK-NEXT: ret <2 x i64> %2
|
||||
%1 = tail call <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64> %a, <2 x i64> %b)
|
||||
ret <2 x i64> %1
|
||||
}
|
||||
|
||||
define <4 x i32> @cmp_sgt_v4i32(<4 x i32> %a, <4 x i32> %b) {
|
||||
; CHECK-LABEL: @cmp_sgt_v4i32
|
||||
; CHECK-NEXT: %1 = icmp sgt <4 x i32> %a, %b
|
||||
; CHECK-NEXT: %2 = sext <4 x i1> %1 to <4 x i32>
|
||||
; CHECK-NEXT: ret <4 x i32> %2
|
||||
%1 = tail call <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32> %a, <4 x i32> %b)
|
||||
ret <4 x i32> %1
|
||||
}
|
||||
|
||||
define <4 x i32> @cmp_ugt_v4i32(<4 x i32> %a, <4 x i32> %b) {
|
||||
; CHECK-LABEL: @cmp_ugt_v4i32
|
||||
; CHECK-NEXT: %1 = icmp ugt <4 x i32> %a, %b
|
||||
; CHECK-NEXT: %2 = sext <4 x i1> %1 to <4 x i32>
|
||||
; CHECK-NEXT: ret <4 x i32> %2
|
||||
%1 = tail call <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32> %a, <4 x i32> %b)
|
||||
ret <4 x i32> %1
|
||||
}
|
||||
|
||||
define <4 x i32> @cmp_sge_v4i32(<4 x i32> %a, <4 x i32> %b) {
|
||||
; CHECK-LABEL: @cmp_sge_v4i32
|
||||
; CHECK-NEXT: %1 = icmp sge <4 x i32> %a, %b
|
||||
; CHECK-NEXT: %2 = sext <4 x i1> %1 to <4 x i32>
|
||||
; CHECK-NEXT: ret <4 x i32> %2
|
||||
%1 = tail call <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32> %a, <4 x i32> %b)
|
||||
ret <4 x i32> %1
|
||||
}
|
||||
|
||||
define <4 x i32> @cmp_uge_v4i32(<4 x i32> %a, <4 x i32> %b) {
|
||||
; CHECK-LABEL: @cmp_uge_v4i32
|
||||
; CHECK-NEXT: %1 = icmp uge <4 x i32> %a, %b
|
||||
; CHECK-NEXT: %2 = sext <4 x i1> %1 to <4 x i32>
|
||||
; CHECK-NEXT: ret <4 x i32> %2
|
||||
%1 = tail call <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32> %a, <4 x i32> %b)
|
||||
ret <4 x i32> %1
|
||||
}
|
||||
|
||||
define <8 x i16> @cmp_seq_v8i16(<8 x i16> %a, <8 x i16> %b) {
|
||||
; CHECK-LABEL: @cmp_seq_v8i16
|
||||
; CHECK-NEXT: %1 = icmp eq <8 x i16> %a, %b
|
||||
; CHECK-NEXT: %2 = sext <8 x i1> %1 to <8 x i16>
|
||||
; CHECK-NEXT: ret <8 x i16> %2
|
||||
%1 = tail call <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16> %a, <8 x i16> %b)
|
||||
ret <8 x i16> %1
|
||||
}
|
||||
|
||||
define <8 x i16> @cmp_ueq_v8i16(<8 x i16> %a, <8 x i16> %b) {
|
||||
; CHECK-LABEL: @cmp_ueq_v8i16
|
||||
; CHECK-NEXT: %1 = icmp eq <8 x i16> %a, %b
|
||||
; CHECK-NEXT: %2 = sext <8 x i1> %1 to <8 x i16>
|
||||
; CHECK-NEXT: ret <8 x i16> %2
|
||||
%1 = tail call <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16> %a, <8 x i16> %b)
|
||||
ret <8 x i16> %1
|
||||
}
|
||||
|
||||
define <8 x i16> @cmp_sne_v8i16(<8 x i16> %a, <8 x i16> %b) {
|
||||
; CHECK-LABEL: @cmp_sne_v8i16
|
||||
; CHECK-NEXT: %1 = icmp ne <8 x i16> %a, %b
|
||||
; CHECK-NEXT: %2 = sext <8 x i1> %1 to <8 x i16>
|
||||
; CHECK-NEXT: ret <8 x i16> %2
|
||||
%1 = tail call <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16> %a, <8 x i16> %b)
|
||||
ret <8 x i16> %1
|
||||
}
|
||||
|
||||
define <8 x i16> @cmp_une_v8i16(<8 x i16> %a, <8 x i16> %b) {
|
||||
; CHECK-LABEL: @cmp_une_v8i16
|
||||
; CHECK-NEXT: %1 = icmp ne <8 x i16> %a, %b
|
||||
; CHECK-NEXT: %2 = sext <8 x i1> %1 to <8 x i16>
|
||||
; CHECK-NEXT: ret <8 x i16> %2
|
||||
%1 = tail call <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16> %a, <8 x i16> %b)
|
||||
ret <8 x i16> %1
|
||||
}
|
||||
|
||||
define <16 x i8> @cmp_strue_v16i8(<16 x i8> %a, <16 x i8> %b) {
|
||||
; CHECK-LABEL: @cmp_strue_v16i8
|
||||
; CHECK-NEXT: ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
|
||||
%1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8> %a, <16 x i8> %b)
|
||||
ret <16 x i8> %1
|
||||
}
|
||||
|
||||
define <16 x i8> @cmp_utrue_v16i8(<16 x i8> %a, <16 x i8> %b) {
|
||||
; CHECK-LABEL: @cmp_utrue_v16i8
|
||||
; CHECK-NEXT: ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
|
||||
%1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8> %a, <16 x i8> %b)
|
||||
ret <16 x i8> %1
|
||||
}
|
||||
|
||||
define <16 x i8> @cmp_sfalse_v16i8(<16 x i8> %a, <16 x i8> %b) {
|
||||
; CHECK-LABEL: @cmp_sfalse_v16i8
|
||||
; CHECK-NEXT: ret <16 x i8> zeroinitializer
|
||||
%1 = tail call <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8> %a, <16 x i8> %b)
|
||||
ret <16 x i8> %1
|
||||
}
|
||||
|
||||
define <16 x i8> @cmp_ufalse_v16i8(<16 x i8> %a, <16 x i8> %b) {
|
||||
; CHECK-LABEL: @cmp_ufalse_v16i8
|
||||
; CHECK-NEXT: ret <16 x i8> zeroinitializer
|
||||
%1 = tail call <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8> %a, <16 x i8> %b)
|
||||
ret <16 x i8> %1
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.xop.vpcomltb(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
declare <8 x i16> @llvm.x86.xop.vpcomltw(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
declare <4 x i32> @llvm.x86.xop.vpcomltd(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
declare <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64>, <2 x i64>) nounwind readnone
|
||||
declare <16 x i8> @llvm.x86.xop.vpcomltub(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
declare <8 x i16> @llvm.x86.xop.vpcomltuw(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
declare <4 x i32> @llvm.x86.xop.vpcomltud(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
declare <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64>, <2 x i64>) nounwind readnone
|
||||
|
||||
declare <16 x i8> @llvm.x86.xop.vpcomleb(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
declare <8 x i16> @llvm.x86.xop.vpcomlew(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
declare <4 x i32> @llvm.x86.xop.vpcomled(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
declare <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64>, <2 x i64>) nounwind readnone
|
||||
declare <16 x i8> @llvm.x86.xop.vpcomleub(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
declare <8 x i16> @llvm.x86.xop.vpcomleuw(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
declare <4 x i32> @llvm.x86.xop.vpcomleud(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
declare <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64>, <2 x i64>) nounwind readnone
|
||||
|
||||
declare <16 x i8> @llvm.x86.xop.vpcomgtb(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
declare <8 x i16> @llvm.x86.xop.vpcomgtw(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
declare <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
declare <2 x i64> @llvm.x86.xop.vpcomgtq(<2 x i64>, <2 x i64>) nounwind readnone
|
||||
declare <16 x i8> @llvm.x86.xop.vpcomgtub(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
declare <8 x i16> @llvm.x86.xop.vpcomgtuw(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
declare <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
declare <2 x i64> @llvm.x86.xop.vpcomgtuq(<2 x i64>, <2 x i64>) nounwind readnone
|
||||
|
||||
declare <16 x i8> @llvm.x86.xop.vpcomgeb(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
declare <8 x i16> @llvm.x86.xop.vpcomgew(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
declare <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
declare <2 x i64> @llvm.x86.xop.vpcomgeq(<2 x i64>, <2 x i64>) nounwind readnone
|
||||
declare <16 x i8> @llvm.x86.xop.vpcomgeub(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
declare <8 x i16> @llvm.x86.xop.vpcomgeuw(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
declare <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
declare <2 x i64> @llvm.x86.xop.vpcomgeuq(<2 x i64>, <2 x i64>) nounwind readnone
|
||||
|
||||
declare <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
declare <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
declare <4 x i32> @llvm.x86.xop.vpcomeqd(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
declare <2 x i64> @llvm.x86.xop.vpcomeqq(<2 x i64>, <2 x i64>) nounwind readnone
|
||||
declare <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
declare <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
declare <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
declare <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64>, <2 x i64>) nounwind readnone
|
||||
|
||||
declare <16 x i8> @llvm.x86.xop.vpcomneb(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
declare <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
declare <4 x i32> @llvm.x86.xop.vpcomned(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
declare <2 x i64> @llvm.x86.xop.vpcomneq(<2 x i64>, <2 x i64>) nounwind readnone
|
||||
declare <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
declare <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
declare <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
declare <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64>, <2 x i64>) nounwind readnone
|
||||
|
||||
declare <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
declare <8 x i16> @llvm.x86.xop.vpcomfalsew(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
declare <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
declare <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64>, <2 x i64>) nounwind readnone
|
||||
declare <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
declare <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
declare <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
declare <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64>, <2 x i64>) nounwind readnone
|
||||
|
||||
declare <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
declare <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
declare <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64>, <2 x i64>) nounwind readnone
|
||||
declare <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
declare <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
declare <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
declare <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
declare <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64>, <2 x i64>) nounwind readnone
|
Loading…
Reference in New Issue