forked from OSchip/llvm-project
[InstCombine][X86] Add MULDQ/MULUDQ undef handling
llvm-svn: 292627
This commit is contained in:
parent
f5677329a6
commit
a50a93fcd0
|
@ -510,6 +510,18 @@ static Value *simplifyX86varShift(const IntrinsicInst &II,
|
|||
return Builder.CreateAShr(Vec, ShiftVec);
|
||||
}
|
||||
|
||||
static Value *simplifyX86muldq(const IntrinsicInst &II) {
|
||||
Value *Arg0 = II.getArgOperand(0);
|
||||
Value *Arg1 = II.getArgOperand(1);
|
||||
Type *ResTy = II.getType();
|
||||
|
||||
// muldq/muludq(undef, undef) -> undef
|
||||
if (isa<UndefValue>(Arg0) && isa<UndefValue>(Arg1))
|
||||
return UndefValue::get(ResTy);
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
static Value *simplifyX86movmsk(const IntrinsicInst &II,
|
||||
InstCombiner::BuilderTy &Builder) {
|
||||
Value *Arg = II.getArgOperand(0);
|
||||
|
@ -2142,6 +2154,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
|||
case Intrinsic::x86_avx2_pmulu_dq:
|
||||
case Intrinsic::x86_avx512_pmul_dq_512:
|
||||
case Intrinsic::x86_avx512_pmulu_dq_512: {
|
||||
if (Value *V = simplifyX86muldq(*II))
|
||||
return replaceInstUsesWith(*II, V);
|
||||
|
||||
unsigned VWidth = II->getType()->getVectorNumElements();
|
||||
APInt UndefElts(VWidth, 0);
|
||||
APInt DemandedElts = APInt::getAllOnesValue(VWidth);
|
||||
|
|
|
@ -1469,6 +1469,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
|
|||
Depth + 1);
|
||||
if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
|
||||
|
||||
// Output elements are undefined if both are undefined. Consider things
|
||||
// like undef*0. The result is known zero, not undef.
|
||||
for (unsigned i = 0; i != VWidth; ++i)
|
||||
if (UndefElts2[i * 2] && UndefElts3[i * 2])
|
||||
UndefElts.setBit(i);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -7,8 +7,7 @@
|
|||
|
||||
define <2 x i64> @undef_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
; CHECK-LABEL: @undef_pmuludq_128(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> undef, <4 x i32> undef)
|
||||
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
|
||||
; CHECK-NEXT: ret <2 x i64> undef
|
||||
;
|
||||
%1 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> undef, <4 x i32> undef)
|
||||
ret <2 x i64> %1
|
||||
|
@ -16,8 +15,7 @@ define <2 x i64> @undef_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) {
|
|||
|
||||
define <4 x i64> @undef_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
; CHECK-LABEL: @undef_pmuludq_256(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> undef, <8 x i32> undef)
|
||||
; CHECK-NEXT: ret <4 x i64> [[TMP1]]
|
||||
; CHECK-NEXT: ret <4 x i64> undef
|
||||
;
|
||||
%1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> undef, <8 x i32> undef)
|
||||
ret <4 x i64> %1
|
||||
|
@ -25,8 +23,7 @@ define <4 x i64> @undef_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) {
|
|||
|
||||
define <8 x i64> @undef_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) {
|
||||
; CHECK-LABEL: @undef_pmuludq_512(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> undef, <16 x i32> undef)
|
||||
; CHECK-NEXT: ret <8 x i64> [[TMP1]]
|
||||
; CHECK-NEXT: ret <8 x i64> undef
|
||||
;
|
||||
%1 = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> undef, <16 x i32> undef)
|
||||
ret <8 x i64> %1
|
||||
|
@ -34,8 +31,7 @@ define <8 x i64> @undef_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) {
|
|||
|
||||
define <2 x i64> @undef_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
; CHECK-LABEL: @undef_pmuldq_128(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> undef, <4 x i32> undef)
|
||||
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
|
||||
; CHECK-NEXT: ret <2 x i64> undef
|
||||
;
|
||||
%1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> undef, <4 x i32> undef)
|
||||
ret <2 x i64> %1
|
||||
|
@ -43,8 +39,7 @@ define <2 x i64> @undef_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) {
|
|||
|
||||
define <4 x i64> @undef_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
; CHECK-LABEL: @undef_pmuldq_256(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> undef, <8 x i32> undef)
|
||||
; CHECK-NEXT: ret <4 x i64> [[TMP1]]
|
||||
; CHECK-NEXT: ret <4 x i64> undef
|
||||
;
|
||||
%1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> undef, <8 x i32> undef)
|
||||
ret <4 x i64> %1
|
||||
|
@ -52,13 +47,66 @@ define <4 x i64> @undef_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) {
|
|||
|
||||
define <8 x i64> @undef_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) {
|
||||
; CHECK-LABEL: @undef_pmuldq_512(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> undef, <16 x i32> undef)
|
||||
; CHECK-NEXT: ret <8 x i64> [[TMP1]]
|
||||
; CHECK-NEXT: ret <8 x i64> undef
|
||||
;
|
||||
%1 = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> undef, <16 x i32> undef)
|
||||
ret <8 x i64> %1
|
||||
}
|
||||
|
||||
define <2 x i64> @undef_zero_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
; CHECK-LABEL: @undef_zero_pmuludq_128(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 0, i32 undef>)
|
||||
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
|
||||
;
|
||||
%1 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> undef, <4 x i32> zeroinitializer)
|
||||
ret <2 x i64> %1
|
||||
}
|
||||
|
||||
define <4 x i64> @undef_zero_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
; CHECK-LABEL: @undef_zero_pmuludq_256(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> <i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef>, <8 x i32> undef)
|
||||
; CHECK-NEXT: ret <4 x i64> [[TMP1]]
|
||||
;
|
||||
%1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> zeroinitializer, <8 x i32> undef)
|
||||
ret <4 x i64> %1
|
||||
}
|
||||
|
||||
define <8 x i64> @undef_zero_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) {
|
||||
; CHECK-LABEL: @undef_zero_pmuludq_512(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> undef, <16 x i32> <i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef>)
|
||||
; CHECK-NEXT: ret <8 x i64> [[TMP1]]
|
||||
;
|
||||
%1 = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> undef, <16 x i32> zeroinitializer)
|
||||
ret <8 x i64> %1
|
||||
}
|
||||
|
||||
define <2 x i64> @undef_zero_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
; CHECK-LABEL: @undef_zero_pmuldq_128(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> <i32 0, i32 undef, i32 0, i32 undef>, <4 x i32> undef)
|
||||
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
|
||||
;
|
||||
%1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> zeroinitializer, <4 x i32> undef)
|
||||
ret <2 x i64> %1
|
||||
}
|
||||
|
||||
define <4 x i64> @undef_zero_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
; CHECK-LABEL: @undef_zero_pmuldq_256(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> undef, <8 x i32> <i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef>)
|
||||
; CHECK-NEXT: ret <4 x i64> [[TMP1]]
|
||||
;
|
||||
%1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> undef, <8 x i32> zeroinitializer)
|
||||
ret <4 x i64> %1
|
||||
}
|
||||
|
||||
define <8 x i64> @undef_zero_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) {
|
||||
; CHECK-LABEL: @undef_zero_pmuldq_512(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> <i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef>, <16 x i32> undef)
|
||||
; CHECK-NEXT: ret <8 x i64> [[TMP1]]
|
||||
;
|
||||
%1 = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> zeroinitializer, <16 x i32> undef)
|
||||
ret <8 x i64> %1
|
||||
}
|
||||
|
||||
;
|
||||
; PMULUDQ/PMULDQ - only the even elements (0, 2, 4, 6) of the vXi32 inputs are required.
|
||||
;
|
||||
|
|
Loading…
Reference in New Issue