forked from OSchip/llvm-project
[AArch64] Basic demand elements for some intrinsics
A lot of neon intrinsics work lane-wise, meaning that non-demanded elements in and not demanded out. This teaches that to AArch64TTIImpl::simplifyDemandedVectorEltsIntrinsic for some simple single-input truncate intrinsics, which can help remove unnecessary instructions. Differential Revision: https://reviews.llvm.org/D117097
This commit is contained in:
parent
4f19bb6f28
commit
61888d97f6
|
@ -1168,6 +1168,32 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
|
|||
return None;
|
||||
}
|
||||
|
||||
Optional<Value *> AArch64TTIImpl::simplifyDemandedVectorEltsIntrinsic(
|
||||
InstCombiner &IC, IntrinsicInst &II, APInt OrigDemandedElts,
|
||||
APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3,
|
||||
std::function<void(Instruction *, unsigned, APInt, APInt &)>
|
||||
SimplifyAndSetOp) const {
|
||||
switch (II.getIntrinsicID()) {
|
||||
default:
|
||||
break;
|
||||
case Intrinsic::aarch64_neon_fcvtxn:
|
||||
case Intrinsic::aarch64_neon_rshrn:
|
||||
case Intrinsic::aarch64_neon_sqrshrn:
|
||||
case Intrinsic::aarch64_neon_sqrshrun:
|
||||
case Intrinsic::aarch64_neon_sqshrn:
|
||||
case Intrinsic::aarch64_neon_sqshrun:
|
||||
case Intrinsic::aarch64_neon_sqxtn:
|
||||
case Intrinsic::aarch64_neon_sqxtun:
|
||||
case Intrinsic::aarch64_neon_uqrshrn:
|
||||
case Intrinsic::aarch64_neon_uqshrn:
|
||||
case Intrinsic::aarch64_neon_uqxtn:
|
||||
SimplifyAndSetOp(&II, 0, OrigDemandedElts, UndefElts);
|
||||
break;
|
||||
}
|
||||
|
||||
return None;
|
||||
}
|
||||
|
||||
bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
|
||||
ArrayRef<const Value *> Args) {
|
||||
|
||||
|
|
|
@ -106,6 +106,12 @@ public:
|
|||
Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
|
||||
IntrinsicInst &II) const;
|
||||
|
||||
Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
|
||||
InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
|
||||
APInt &UndefElts2, APInt &UndefElts3,
|
||||
std::function<void(Instruction *, unsigned, APInt, APInt &)>
|
||||
SimplifyAndSetOp) const;
|
||||
|
||||
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
|
||||
switch (K) {
|
||||
case TargetTransformInfo::RGK_Scalar:
|
||||
|
|
|
@ -3,8 +3,7 @@
|
|||
|
||||
define <2 x float> @fcvtxn(<2 x double> %d1) {
|
||||
; CHECK-LABEL: @fcvtxn(
|
||||
; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x double> [[D1:%.*]], <2 x double> undef, <2 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[I:%.*]] = call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> [[A]])
|
||||
; CHECK-NEXT: [[I:%.*]] = call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> [[D1:%.*]])
|
||||
; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x float> [[I]], <2 x float> undef, <2 x i32> <i32 0, i32 undef>
|
||||
; CHECK-NEXT: ret <2 x float> [[S]]
|
||||
;
|
||||
|
@ -16,7 +15,7 @@ define <2 x float> @fcvtxn(<2 x double> %d1) {
|
|||
|
||||
define <4 x i16> @rshrn(<2 x i32> %d1, <2 x i32> %d2) {
|
||||
; CHECK-LABEL: @rshrn(
|
||||
; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> [[D2:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[A]], i32 9)
|
||||
; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: ret <4 x i16> [[S]]
|
||||
|
@ -29,7 +28,7 @@ define <4 x i16> @rshrn(<2 x i32> %d1, <2 x i32> %d2) {
|
|||
|
||||
define <4 x i16> @sqrshrn(<2 x i32> %d1, <2 x i32> %d2) {
|
||||
; CHECK-LABEL: @sqrshrn(
|
||||
; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> [[D2:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[A]], i32 9)
|
||||
; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: ret <4 x i16> [[S]]
|
||||
|
@ -42,7 +41,7 @@ define <4 x i16> @sqrshrn(<2 x i32> %d1, <2 x i32> %d2) {
|
|||
|
||||
define <4 x i16> @sqrshrun(<2 x i32> %d1, <2 x i32> %d2) {
|
||||
; CHECK-LABEL: @sqrshrun(
|
||||
; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> [[D2:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[A]], i32 9)
|
||||
; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: ret <4 x i16> [[S]]
|
||||
|
@ -55,7 +54,7 @@ define <4 x i16> @sqrshrun(<2 x i32> %d1, <2 x i32> %d2) {
|
|||
|
||||
define <4 x i16> @sqshrn(<2 x i32> %d1, <2 x i32> %d2) {
|
||||
; CHECK-LABEL: @sqshrn(
|
||||
; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> [[D2:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[A]], i32 9)
|
||||
; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: ret <4 x i16> [[S]]
|
||||
|
@ -68,7 +67,7 @@ define <4 x i16> @sqshrn(<2 x i32> %d1, <2 x i32> %d2) {
|
|||
|
||||
define <4 x i16> @sqshrun(<2 x i32> %d1, <2 x i32> %d2) {
|
||||
; CHECK-LABEL: @sqshrun(
|
||||
; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> [[D2:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[A]], i32 9)
|
||||
; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: ret <4 x i16> [[S]]
|
||||
|
@ -81,7 +80,7 @@ define <4 x i16> @sqshrun(<2 x i32> %d1, <2 x i32> %d2) {
|
|||
|
||||
define <4 x i16> @sqxtn(<2 x i32> %d1, <2 x i32> %d2) {
|
||||
; CHECK-LABEL: @sqxtn(
|
||||
; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> [[D2:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[A]])
|
||||
; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: ret <4 x i16> [[S]]
|
||||
|
@ -94,7 +93,7 @@ define <4 x i16> @sqxtn(<2 x i32> %d1, <2 x i32> %d2) {
|
|||
|
||||
define <4 x i16> @sqxtun(<2 x i32> %d1, <2 x i32> %d2) {
|
||||
; CHECK-LABEL: @sqxtun(
|
||||
; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> [[D2:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[A]])
|
||||
; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: ret <4 x i16> [[S]]
|
||||
|
@ -107,7 +106,7 @@ define <4 x i16> @sqxtun(<2 x i32> %d1, <2 x i32> %d2) {
|
|||
|
||||
define <4 x i16> @uqrshrn(<2 x i32> %d1, <2 x i32> %d2) {
|
||||
; CHECK-LABEL: @uqrshrn(
|
||||
; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> [[D2:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[A]], i32 9)
|
||||
; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: ret <4 x i16> [[S]]
|
||||
|
@ -120,7 +119,7 @@ define <4 x i16> @uqrshrn(<2 x i32> %d1, <2 x i32> %d2) {
|
|||
|
||||
define <4 x i16> @uqshrn(<2 x i32> %d1, <2 x i32> %d2) {
|
||||
; CHECK-LABEL: @uqshrn(
|
||||
; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> [[D2:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[A]], i32 9)
|
||||
; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: ret <4 x i16> [[S]]
|
||||
|
@ -133,7 +132,7 @@ define <4 x i16> @uqshrn(<2 x i32> %d1, <2 x i32> %d2) {
|
|||
|
||||
define <4 x i16> @uqxtn(<2 x i32> %d1, <2 x i32> %d2) {
|
||||
; CHECK-LABEL: @uqxtn(
|
||||
; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> [[D2:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[A]])
|
||||
; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: ret <4 x i16> [[S]]
|
||||
|
|
Loading…
Reference in New Issue