forked from OSchip/llvm-project
SimplifyDemandedVectorElts for all intrinsics
The point is that this simplifies integration of new intrinsics into SimplifiedDemandedVectorElts, and ensures we don't miss any existing ones. This is intended to be NFC-ish, but as seen from the diffs, can produce slightly different output. This is due to order of transforms w/in instcombine resulting in two slightly different fixed points. That's something we should fix, but isn't a problem w/this patch per se. Differential Revision: https://reviews.llvm.org/D57398 llvm-svn: 352653
This commit is contained in:
parent
3d2c8aaf46
commit
c71e996aed
|
@ -1868,6 +1868,19 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
|||
if (Changed) return II;
|
||||
}
|
||||
|
||||
// For vector result intrinsics, use the generic demanded vector support to
|
||||
// simplify any operands before moving on to the per-intrinsic rules.
|
||||
if (II->getType()->isVectorTy()) {
|
||||
auto VWidth = II->getType()->getVectorNumElements();
|
||||
APInt UndefElts(VWidth, 0);
|
||||
APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
|
||||
if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
|
||||
if (V != II)
|
||||
return replaceInstUsesWith(*II, V);
|
||||
return II;
|
||||
}
|
||||
}
|
||||
|
||||
if (Instruction *I = SimplifyNVVMIntrinsic(II, *this))
|
||||
return I;
|
||||
|
||||
|
@ -2666,41 +2679,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
|||
return replaceInstUsesWith(*II, V);
|
||||
}
|
||||
}
|
||||
LLVM_FALLTHROUGH;
|
||||
break;
|
||||
|
||||
// X86 scalar intrinsics simplified with SimplifyDemandedVectorElts.
|
||||
case Intrinsic::x86_avx512_mask_max_ss_round:
|
||||
case Intrinsic::x86_avx512_mask_min_ss_round:
|
||||
case Intrinsic::x86_avx512_mask_max_sd_round:
|
||||
case Intrinsic::x86_avx512_mask_min_sd_round:
|
||||
case Intrinsic::x86_sse_cmp_ss:
|
||||
case Intrinsic::x86_sse_min_ss:
|
||||
case Intrinsic::x86_sse_max_ss:
|
||||
case Intrinsic::x86_sse2_cmp_sd:
|
||||
case Intrinsic::x86_sse2_min_sd:
|
||||
case Intrinsic::x86_sse2_max_sd:
|
||||
case Intrinsic::x86_xop_vfrcz_ss:
|
||||
case Intrinsic::x86_xop_vfrcz_sd: {
|
||||
unsigned VWidth = II->getType()->getVectorNumElements();
|
||||
APInt UndefElts(VWidth, 0);
|
||||
APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
|
||||
if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
|
||||
if (V != II)
|
||||
return replaceInstUsesWith(*II, V);
|
||||
return II;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Intrinsic::x86_sse41_round_ss:
|
||||
case Intrinsic::x86_sse41_round_sd: {
|
||||
unsigned VWidth = II->getType()->getVectorNumElements();
|
||||
APInt UndefElts(VWidth, 0);
|
||||
APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
|
||||
if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
|
||||
if (V != II)
|
||||
return replaceInstUsesWith(*II, V);
|
||||
return II;
|
||||
} else if (Value *V = simplifyX86round(*II, Builder))
|
||||
if (Value *V = simplifyX86round(*II, Builder))
|
||||
return replaceInstUsesWith(*II, V);
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -7,7 +7,7 @@ declare <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>, <4 x float>,
|
|||
define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) {
|
||||
; CHECK-LABEL: @test_add_ss(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP4]]
|
||||
|
@ -38,7 +38,7 @@ define <4 x float> @test_add_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float>
|
|||
; CHECK-NEXT: [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP8]]
|
||||
|
@ -149,7 +149,7 @@ declare <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float>, <4 x float>,
|
|||
define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) {
|
||||
; CHECK-LABEL: @test_sub_ss(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fsub float [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP4]]
|
||||
|
@ -180,7 +180,7 @@ define <4 x float> @test_sub_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float>
|
|||
; CHECK-NEXT: [[TMP3:%.*]] = fsub float [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP8]]
|
||||
|
@ -291,7 +291,7 @@ declare <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float>, <4 x float>,
|
|||
define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) {
|
||||
; CHECK-LABEL: @test_mul_ss(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fmul float [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP4]]
|
||||
|
@ -322,7 +322,7 @@ define <4 x float> @test_mul_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float>
|
|||
; CHECK-NEXT: [[TMP3:%.*]] = fmul float [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP8]]
|
||||
|
@ -433,7 +433,7 @@ declare <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float>, <4 x float>,
|
|||
define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) {
|
||||
; CHECK-LABEL: @test_div_ss(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fdiv float [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP4]]
|
||||
|
@ -464,7 +464,7 @@ define <4 x float> @test_div_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float>
|
|||
; CHECK-NEXT: [[TMP3:%.*]] = fdiv float [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP8]]
|
||||
|
|
Loading…
Reference in New Issue